Fixed initialisation of tf in file_open(). Without setting the memory to 0,
[cinelerra_cv/mob.git] / quicktime / ffmpeg / libavcodec / h264.c
blob7bef8ca8079773e1037b49a4597bbf80600e16a2
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 /**
22 * @file h264.c
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
27 #include "common.h"
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264data.h"
32 #include "golomb.h"
34 #include "cabac.h"
36 #undef NDEBUG
37 #include <assert.h>
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
57 /**
58 * Sequence parameter set
60 typedef struct SPS{
62 int profile_idc;
63 int level_idc;
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
85 AVRational sar;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
88 uint32_t time_scale;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
93 }SPS;
95 /**
96 * Picture parameter set
98 typedef struct PPS{
99 int sps_id;
100 int cabac; ///< entropy_coding_mode_flag
101 int pic_order_present; ///< pic_order_present_flag
102 int slice_group_count; ///< num_slice_groups_minus1 + 1
103 int mb_slice_group_map_type;
104 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
105 int weighted_pred; ///< weighted_pred_flag
106 int weighted_bipred_idc;
107 int init_qp; ///< pic_init_qp_minus26 + 26
108 int init_qs; ///< pic_init_qs_minus26 + 26
109 int chroma_qp_index_offset;
110 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
111 int constrained_intra_pred; ///< constrained_intra_pred_flag
112 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
113 int transform_8x8_mode; ///< transform_8x8_mode_flag
114 }PPS;
117 * Memory management control operation opcode.
119 typedef enum MMCOOpcode{
120 MMCO_END=0,
121 MMCO_SHORT2UNUSED,
122 MMCO_LONG2UNUSED,
123 MMCO_SHORT2LONG,
124 MMCO_SET_MAX_LONG,
125 MMCO_RESET,
126 MMCO_LONG,
127 } MMCOOpcode;
130 * Memory management control operation.
132 typedef struct MMCO{
133 MMCOOpcode opcode;
134 int short_frame_num;
135 int long_index;
136 } MMCO;
139 * H264Context
141 typedef struct H264Context{
142 MpegEncContext s;
143 int nal_ref_idc;
144 int nal_unit_type;
145 #define NAL_SLICE 1
146 #define NAL_DPA 2
147 #define NAL_DPB 3
148 #define NAL_DPC 4
149 #define NAL_IDR_SLICE 5
150 #define NAL_SEI 6
151 #define NAL_SPS 7
152 #define NAL_PPS 8
153 #define NAL_PICTURE_DELIMITER 9
154 #define NAL_FILTER_DATA 10
155 uint8_t *rbsp_buffer;
156 int rbsp_buffer_size;
159 * Used to parse AVC variant of h264
161 int is_avc; ///< this flag is != 0 if codec is avc1
162 int got_avcC; ///< flag used to parse avcC data only once
163 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
165 int chroma_qp; //QPc
167 int prev_mb_skipped; //FIXME remove (IMHO not used)
169 //prediction stuff
170 int chroma_pred_mode;
171 int intra16x16_pred_mode;
173 int top_mb_xy;
174 int left_mb_xy[2];
176 int8_t intra4x4_pred_mode_cache[5*8];
177 int8_t (*intra4x4_pred_mode)[8];
178 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
179 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
180 void (*pred8x8 [4+3])(uint8_t *src, int stride);
181 void (*pred16x16[4+3])(uint8_t *src, int stride);
182 unsigned int topleft_samples_available;
183 unsigned int top_samples_available;
184 unsigned int topright_samples_available;
185 unsigned int left_samples_available;
186 uint8_t (*top_borders[2])[16+2*8];
187 uint8_t left_border[2*(17+2*9)];
190 * non zero coeff count cache.
191 * is 64 if not available.
193 uint8_t non_zero_count_cache[6*8] __align8;
194 uint8_t (*non_zero_count)[16];
197 * Motion vector cache.
199 int16_t mv_cache[2][5*8][2] __align8;
200 int8_t ref_cache[2][5*8] __align8;
201 #define LIST_NOT_USED -1 //FIXME rename?
202 #define PART_NOT_AVAILABLE -2
205 * is 1 if the specific list MV&references are set to 0,0,-2.
207 int mv_cache_clean[2];
210 * number of neighbors (top and/or left) that used 8x8 dct
212 int neighbor_transform_size;
215 * block_offset[ 0..23] for frame macroblocks
216 * block_offset[24..47] for field macroblocks
218 int block_offset[2*(16+8)];
220 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
221 uint32_t *mb2b8_xy;
222 int b_stride; //FIXME use s->b4_stride
223 int b8_stride;
225 int halfpel_flag;
226 int thirdpel_flag;
228 int unknown_svq3_flag;
229 int next_slice_index;
231 SPS sps_buffer[MAX_SPS_COUNT];
232 SPS sps; ///< current sps
234 PPS pps_buffer[MAX_PPS_COUNT];
236 * current pps
238 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
240 uint16_t (*dequant4_coeff)[16]; // FIXME quant matrices should be per SPS or PPS
241 uint16_t (*dequant8_coeff)[64];
243 int slice_num;
244 uint8_t *slice_table_base;
245 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
246 int slice_type;
247 int slice_type_fixed;
249 //interlacing specific flags
250 int mb_aff_frame;
251 int mb_field_decoding_flag;
253 int sub_mb_type[4];
255 //POC stuff
256 int poc_lsb;
257 int poc_msb;
258 int delta_poc_bottom;
259 int delta_poc[2];
260 int frame_num;
261 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
262 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
263 int frame_num_offset; ///< for POC type 2
264 int prev_frame_num_offset; ///< for POC type 2
265 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
268 * frame_num for frames or 2*frame_num for field pics.
270 int curr_pic_num;
273 * max_frame_num or 2*max_frame_num for field pics.
275 int max_pic_num;
277 //Weighted pred stuff
278 int use_weight;
279 int use_weight_chroma;
280 int luma_log2_weight_denom;
281 int chroma_log2_weight_denom;
282 int luma_weight[2][16];
283 int luma_offset[2][16];
284 int chroma_weight[2][16][2];
285 int chroma_offset[2][16][2];
286 int implicit_weight[16][16];
288 //deblock
289 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
290 int slice_alpha_c0_offset;
291 int slice_beta_offset;
293 int redundant_pic_count;
295 int direct_spatial_mv_pred;
296 int dist_scale_factor[16];
297 int map_col_to_list0[2][16];
300 * num_ref_idx_l0/1_active_minus1 + 1
302 int ref_count[2];// FIXME split for AFF
303 Picture *short_ref[32];
304 Picture *long_ref[32];
305 Picture default_ref_list[2][32];
306 Picture ref_list[2][32]; //FIXME size?
307 Picture field_ref_list[2][32]; //FIXME size?
308 Picture *delayed_pic[16]; //FIXME size?
309 Picture *delayed_output_pic;
312 * memory management control operations buffer.
314 MMCO mmco[MAX_MMCO_COUNT];
315 int mmco_index;
317 int long_ref_count; ///< number of actual long term references
318 int short_ref_count; ///< number of actual short term references
320 //data partitioning
321 GetBitContext intra_gb;
322 GetBitContext inter_gb;
323 GetBitContext *intra_gb_ptr;
324 GetBitContext *inter_gb_ptr;
326 DCTELEM mb[16*24] __align8;
329 * Cabac
331 CABACContext cabac;
332 uint8_t cabac_state[460];
333 int cabac_init_idc;
335 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
336 uint16_t *cbp_table;
337 int top_cbp;
338 int left_cbp;
339 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
340 uint8_t *chroma_pred_mode_table;
341 int last_qscale_diff;
342 int16_t (*mvd_table[2])[2];
343 int16_t mvd_cache[2][5*8][2] __align8;
344 uint8_t *direct_table;
345 uint8_t direct_cache[5*8];
347 uint8_t zigzag_scan[16];
348 uint8_t field_scan[16];
349 const uint8_t *zigzag_scan_q0;
350 const uint8_t *field_scan_q0;
351 }H264Context;
353 static VLC coeff_token_vlc[4];
354 static VLC chroma_dc_coeff_token_vlc;
356 static VLC total_zeros_vlc[15];
357 static VLC chroma_dc_total_zeros_vlc[3];
359 static VLC run_vlc[6];
360 static VLC run7_vlc;
362 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
363 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
364 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
366 static inline uint32_t pack16to32(int a, int b){
367 #ifdef WORDS_BIGENDIAN
368 return (b&0xFFFF) + (a<<16);
369 #else
370 return (a&0xFFFF) + (b<<16);
371 #endif
375 * fill a rectangle.
376 * @param h height of the rectangle, should be a constant
377 * @param w width of the rectangle, should be a constant
378 * @param size the size of val (1 or 4), should be a constant
380 static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
381 uint8_t *p= (uint8_t*)vp;
382 assert(size==1 || size==4);
384 w *= size;
385 stride *= size;
387 assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
388 assert((stride&(w-1))==0);
389 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
390 if(w==2 && h==2){
391 *(uint16_t*)(p + 0)=
392 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
393 }else if(w==2 && h==4){
394 *(uint16_t*)(p + 0*stride)=
395 *(uint16_t*)(p + 1*stride)=
396 *(uint16_t*)(p + 2*stride)=
397 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
398 }else if(w==4 && h==1){
399 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
400 }else if(w==4 && h==2){
401 *(uint32_t*)(p + 0*stride)=
402 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
403 }else if(w==4 && h==4){
404 *(uint32_t*)(p + 0*stride)=
405 *(uint32_t*)(p + 1*stride)=
406 *(uint32_t*)(p + 2*stride)=
407 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
408 }else if(w==8 && h==1){
409 *(uint32_t*)(p + 0)=
410 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
411 }else if(w==8 && h==2){
412 *(uint32_t*)(p + 0 + 0*stride)=
413 *(uint32_t*)(p + 4 + 0*stride)=
414 *(uint32_t*)(p + 0 + 1*stride)=
415 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
416 }else if(w==8 && h==4){
417 *(uint64_t*)(p + 0*stride)=
418 *(uint64_t*)(p + 1*stride)=
419 *(uint64_t*)(p + 2*stride)=
420 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
421 }else if(w==16 && h==2){
422 *(uint64_t*)(p + 0+0*stride)=
423 *(uint64_t*)(p + 8+0*stride)=
424 *(uint64_t*)(p + 0+1*stride)=
425 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
426 }else if(w==16 && h==4){
427 *(uint64_t*)(p + 0+0*stride)=
428 *(uint64_t*)(p + 8+0*stride)=
429 *(uint64_t*)(p + 0+1*stride)=
430 *(uint64_t*)(p + 8+1*stride)=
431 *(uint64_t*)(p + 0+2*stride)=
432 *(uint64_t*)(p + 8+2*stride)=
433 *(uint64_t*)(p + 0+3*stride)=
434 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
435 }else
436 assert(0);
439 static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
440 MpegEncContext * const s = &h->s;
441 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
442 int topleft_xy, top_xy, topright_xy, left_xy[2];
443 int topleft_type, top_type, topright_type, left_type[2];
444 int left_block[8];
445 int i;
447 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
448 // the actual condition is whether we're on the edge of a slice,
449 // and even then the intra and nnz parts are unnecessary.
450 if(for_deblock && h->slice_num == 1)
451 return;
453 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
455 top_xy = mb_xy - s->mb_stride;
456 topleft_xy = top_xy - 1;
457 topright_xy= top_xy + 1;
458 left_xy[1] = left_xy[0] = mb_xy-1;
459 left_block[0]= 0;
460 left_block[1]= 1;
461 left_block[2]= 2;
462 left_block[3]= 3;
463 left_block[4]= 7;
464 left_block[5]= 10;
465 left_block[6]= 8;
466 left_block[7]= 11;
467 if(h->mb_aff_frame){
468 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
469 const int top_pair_xy = pair_xy - s->mb_stride;
470 const int topleft_pair_xy = top_pair_xy - 1;
471 const int topright_pair_xy = top_pair_xy + 1;
472 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
473 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
474 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
475 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
476 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
477 const int bottom = (s->mb_y & 1);
478 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
479 if (bottom
480 ? !curr_mb_frame_flag // bottom macroblock
481 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
483 top_xy -= s->mb_stride;
485 if (bottom
486 ? !curr_mb_frame_flag // bottom macroblock
487 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
489 topleft_xy -= s->mb_stride;
491 if (bottom
492 ? !curr_mb_frame_flag // bottom macroblock
493 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
495 topright_xy -= s->mb_stride;
497 if (left_mb_frame_flag != curr_mb_frame_flag) {
498 left_xy[1] = left_xy[0] = pair_xy - 1;
499 if (curr_mb_frame_flag) {
500 if (bottom) {
501 left_block[0]= 2;
502 left_block[1]= 2;
503 left_block[2]= 3;
504 left_block[3]= 3;
505 left_block[4]= 8;
506 left_block[5]= 11;
507 left_block[6]= 8;
508 left_block[7]= 11;
509 } else {
510 left_block[0]= 0;
511 left_block[1]= 0;
512 left_block[2]= 1;
513 left_block[3]= 1;
514 left_block[4]= 7;
515 left_block[5]= 10;
516 left_block[6]= 7;
517 left_block[7]= 10;
519 } else {
520 left_xy[1] += s->mb_stride;
521 //left_block[0]= 0;
522 left_block[1]= 2;
523 left_block[2]= 0;
524 left_block[3]= 2;
525 //left_block[4]= 7;
526 left_block[5]= 10;
527 left_block[6]= 7;
528 left_block[7]= 10;
533 h->top_mb_xy = top_xy;
534 h->left_mb_xy[0] = left_xy[0];
535 h->left_mb_xy[1] = left_xy[1];
536 if(for_deblock){
537 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
538 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
539 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
540 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
541 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
542 }else{
543 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
544 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
545 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
546 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
547 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
550 if(IS_INTRA(mb_type)){
551 h->topleft_samples_available=
552 h->top_samples_available=
553 h->left_samples_available= 0xFFFF;
554 h->topright_samples_available= 0xEEEA;
556 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
557 h->topleft_samples_available= 0xB3FF;
558 h->top_samples_available= 0x33FF;
559 h->topright_samples_available= 0x26EA;
561 for(i=0; i<2; i++){
562 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
563 h->topleft_samples_available&= 0xDF5F;
564 h->left_samples_available&= 0x5F5F;
568 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
569 h->topleft_samples_available&= 0x7FFF;
571 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
572 h->topright_samples_available&= 0xFBFF;
574 if(IS_INTRA4x4(mb_type)){
575 if(IS_INTRA4x4(top_type)){
576 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
577 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
578 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
579 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
580 }else{
581 int pred;
582 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
583 pred= -1;
584 else{
585 pred= 2;
587 h->intra4x4_pred_mode_cache[4+8*0]=
588 h->intra4x4_pred_mode_cache[5+8*0]=
589 h->intra4x4_pred_mode_cache[6+8*0]=
590 h->intra4x4_pred_mode_cache[7+8*0]= pred;
592 for(i=0; i<2; i++){
593 if(IS_INTRA4x4(left_type[i])){
594 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
595 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
596 }else{
597 int pred;
598 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
599 pred= -1;
600 else{
601 pred= 2;
603 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
604 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
612 0 . T T. T T T T
613 1 L . .L . . . .
614 2 L . .L . . . .
615 3 . T TL . . . .
616 4 L . .L . . . .
617 5 L . .. . . . .
619 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
620 if(top_type){
621 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
622 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
623 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
624 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
626 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
627 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
629 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
630 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
632 }else{
633 h->non_zero_count_cache[4+8*0]=
634 h->non_zero_count_cache[5+8*0]=
635 h->non_zero_count_cache[6+8*0]=
636 h->non_zero_count_cache[7+8*0]=
638 h->non_zero_count_cache[1+8*0]=
639 h->non_zero_count_cache[2+8*0]=
641 h->non_zero_count_cache[1+8*3]=
642 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
646 for (i=0; i<2; i++) {
647 if(left_type[i]){
648 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
649 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
650 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
651 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
652 }else{
653 h->non_zero_count_cache[3+8*1 + 2*8*i]=
654 h->non_zero_count_cache[3+8*2 + 2*8*i]=
655 h->non_zero_count_cache[0+8*1 + 8*i]=
656 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
660 if( h->pps.cabac ) {
661 // top_cbp
662 if(top_type) {
663 h->top_cbp = h->cbp_table[top_xy];
664 } else if(IS_INTRA(mb_type)) {
665 h->top_cbp = 0x1C0;
666 } else {
667 h->top_cbp = 0;
669 // left_cbp
670 if (left_type[0]) {
671 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
672 } else if(IS_INTRA(mb_type)) {
673 h->left_cbp = 0x1C0;
674 } else {
675 h->left_cbp = 0;
677 if (left_type[0]) {
678 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
680 if (left_type[1]) {
681 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
685 #if 1
686 //FIXME direct mb can skip much of this
687 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
688 int list;
689 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
690 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
691 /*if(!h->mv_cache_clean[list]){
692 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
693 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
694 h->mv_cache_clean[list]= 1;
696 continue;
698 h->mv_cache_clean[list]= 0;
700 if(IS_INTER(top_type)){
701 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
702 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
703 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
704 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
705 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
706 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
707 h->ref_cache[list][scan8[0] + 0 - 1*8]=
708 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
709 h->ref_cache[list][scan8[0] + 2 - 1*8]=
710 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
711 }else{
712 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
713 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
714 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
715 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
716 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
719 //FIXME unify cleanup or sth
720 if(IS_INTER(left_type[0])){
721 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
722 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
723 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
724 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
725 h->ref_cache[list][scan8[0] - 1 + 0*8]=
726 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
727 }else{
728 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
729 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
730 h->ref_cache[list][scan8[0] - 1 + 0*8]=
731 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
734 if(IS_INTER(left_type[1])){
735 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
736 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
737 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
739 h->ref_cache[list][scan8[0] - 1 + 2*8]=
740 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
741 }else{
742 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
743 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
744 h->ref_cache[list][scan8[0] - 1 + 2*8]=
745 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
746 assert((!left_type[0]) == (!left_type[1]));
749 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
750 continue;
752 if(IS_INTER(topleft_type)){
753 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
754 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
755 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
756 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
757 }else{
758 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
759 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
762 if(IS_INTER(topright_type)){
763 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
764 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
765 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
766 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
767 }else{
768 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
769 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
773 h->ref_cache[list][scan8[5 ]+1] =
774 h->ref_cache[list][scan8[7 ]+1] =
775 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
776 h->ref_cache[list][scan8[4 ]] =
777 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
778 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
779 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
780 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
781 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
782 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
784 if( h->pps.cabac ) {
785 /* XXX beurk, Load mvd */
786 if(IS_INTER(topleft_type)){
787 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
788 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
789 }else{
790 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
793 if(IS_INTER(top_type)){
794 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
795 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
796 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
797 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
798 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
799 }else{
800 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
801 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
802 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
803 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
805 if(IS_INTER(left_type[0])){
806 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
807 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
808 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
809 }else{
810 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
811 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
813 if(IS_INTER(left_type[1])){
814 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
815 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
816 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
817 }else{
818 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
819 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
821 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
822 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
823 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
824 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
825 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
827 if(h->slice_type == B_TYPE){
828 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
830 if(IS_DIRECT(top_type)){
831 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
832 }else if(IS_8X8(top_type)){
833 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
834 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
835 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
836 }else{
837 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
840 //FIXME interlacing
841 if(IS_DIRECT(left_type[0])){
842 h->direct_cache[scan8[0] - 1 + 0*8]=
843 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
844 }else if(IS_8X8(left_type[0])){
845 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
846 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
847 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
848 }else{
849 h->direct_cache[scan8[0] - 1 + 0*8]=
850 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
856 #endif
858 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
861 static inline void write_back_intra_pred_mode(H264Context *h){
862 MpegEncContext * const s = &h->s;
863 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
865 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
866 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
867 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
868 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
869 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
870 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
871 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
875 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
877 static inline int check_intra4x4_pred_mode(H264Context *h){
878 MpegEncContext * const s = &h->s;
879 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
880 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
881 int i;
883 if(!(h->top_samples_available&0x8000)){
884 for(i=0; i<4; i++){
885 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
886 if(status<0){
887 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
888 return -1;
889 } else if(status){
890 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
895 if(!(h->left_samples_available&0x8000)){
896 for(i=0; i<4; i++){
897 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
898 if(status<0){
899 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
900 return -1;
901 } else if(status){
902 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
907 return 0;
908 } //FIXME cleanup like next
911 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
913 static inline int check_intra_pred_mode(H264Context *h, int mode){
914 MpegEncContext * const s = &h->s;
915 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
916 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
918 if(mode < 0 || mode > 6) {
919 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
920 return -1;
923 if(!(h->top_samples_available&0x8000)){
924 mode= top[ mode ];
925 if(mode<0){
926 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
927 return -1;
931 if(!(h->left_samples_available&0x8000)){
932 mode= left[ mode ];
933 if(mode<0){
934 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
935 return -1;
939 return mode;
943 * gets the predicted intra4x4 prediction mode.
945 static inline int pred_intra_mode(H264Context *h, int n){
946 const int index8= scan8[n];
947 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
948 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
949 const int min= FFMIN(left, top);
951 tprintf("mode:%d %d min:%d\n", left ,top, min);
953 if(min<0) return DC_PRED;
954 else return min;
957 static inline void write_back_non_zero_count(H264Context *h){
958 MpegEncContext * const s = &h->s;
959 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
961 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
962 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
963 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
964 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
965 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
966 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
967 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
969 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
970 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
971 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
973 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
974 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
975 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
979 * gets the predicted number of non zero coefficients.
980 * @param n block index
982 static inline int pred_non_zero_count(H264Context *h, int n){
983 const int index8= scan8[n];
984 const int left= h->non_zero_count_cache[index8 - 1];
985 const int top = h->non_zero_count_cache[index8 - 8];
986 int i= left + top;
988 if(i<64) i= (i+1)>>1;
990 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
992 return i&31;
995 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
996 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
998 if(topright_ref != PART_NOT_AVAILABLE){
999 *C= h->mv_cache[list][ i - 8 + part_width ];
1000 return topright_ref;
1001 }else{
1002 tprintf("topright MV not available\n");
1004 *C= h->mv_cache[list][ i - 8 - 1 ];
1005 return h->ref_cache[list][ i - 8 - 1 ];
1010 * gets the predicted MV.
1011 * @param n the block index
1012 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1013 * @param mx the x component of the predicted motion vector
1014 * @param my the y component of the predicted motion vector
1016 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1017 const int index8= scan8[n];
1018 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1019 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1020 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1021 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1022 const int16_t * C;
1023 int diagonal_ref, match_count;
1025 assert(part_width==1 || part_width==2 || part_width==4);
1027 /* mv_cache
1028 B . . A T T T T
1029 U . . L . . , .
1030 U . . L . . . .
1031 U . . L . . , .
1032 . . . L . . . .
1035 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1036 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1037 tprintf("pred_motion match_count=%d\n", match_count);
1038 if(match_count > 1){ //most common
1039 *mx= mid_pred(A[0], B[0], C[0]);
1040 *my= mid_pred(A[1], B[1], C[1]);
1041 }else if(match_count==1){
1042 if(left_ref==ref){
1043 *mx= A[0];
1044 *my= A[1];
1045 }else if(top_ref==ref){
1046 *mx= B[0];
1047 *my= B[1];
1048 }else{
1049 *mx= C[0];
1050 *my= C[1];
1052 }else{
1053 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1054 *mx= A[0];
1055 *my= A[1];
1056 }else{
1057 *mx= mid_pred(A[0], B[0], C[0]);
1058 *my= mid_pred(A[1], B[1], C[1]);
1062 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1066 * gets the directionally predicted 16x8 MV.
1067 * @param n the block index
1068 * @param mx the x component of the predicted motion vector
1069 * @param my the y component of the predicted motion vector
1071 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1072 if(n==0){
1073 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1074 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1076 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1078 if(top_ref == ref){
1079 *mx= B[0];
1080 *my= B[1];
1081 return;
1083 }else{
1084 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1085 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1087 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1089 if(left_ref == ref){
1090 *mx= A[0];
1091 *my= A[1];
1092 return;
1096 //RARE
1097 pred_motion(h, n, 4, list, ref, mx, my);
1101 * gets the directionally predicted 8x16 MV.
1102 * @param n the block index
1103 * @param mx the x component of the predicted motion vector
1104 * @param my the y component of the predicted motion vector
1106 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1107 if(n==0){
1108 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1109 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1111 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1113 if(left_ref == ref){
1114 *mx= A[0];
1115 *my= A[1];
1116 return;
1118 }else{
1119 const int16_t * C;
1120 int diagonal_ref;
1122 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1124 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1126 if(diagonal_ref == ref){
1127 *mx= C[0];
1128 *my= C[1];
1129 return;
1133 //RARE
1134 pred_motion(h, n, 2, list, ref, mx, my);
1137 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1138 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1139 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1141 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1143 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1144 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1145 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1147 *mx = *my = 0;
1148 return;
1151 pred_motion(h, 0, 4, 0, 0, mx, my);
1153 return;
1156 static inline void direct_dist_scale_factor(H264Context * const h){
1157 const int poc = h->s.current_picture_ptr->poc;
1158 const int poc1 = h->ref_list[1][0].poc;
1159 int i;
1160 for(i=0; i<h->ref_count[0]; i++){
1161 int poc0 = h->ref_list[0][i].poc;
1162 int td = clip(poc1 - poc0, -128, 127);
1163 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1164 h->dist_scale_factor[i] = 256;
1165 }else{
1166 int tb = clip(poc - poc0, -128, 127);
1167 int tx = (16384 + (ABS(td) >> 1)) / td;
1168 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1172 static inline void direct_ref_list_init(H264Context * const h){
1173 MpegEncContext * const s = &h->s;
1174 Picture * const ref1 = &h->ref_list[1][0];
1175 Picture * const cur = s->current_picture_ptr;
1176 int list, i, j;
1177 if(cur->pict_type == I_TYPE)
1178 cur->ref_count[0] = 0;
1179 if(cur->pict_type != B_TYPE)
1180 cur->ref_count[1] = 0;
1181 for(list=0; list<2; list++){
1182 cur->ref_count[list] = h->ref_count[list];
1183 for(j=0; j<h->ref_count[list]; j++)
1184 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1186 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1187 return;
1188 for(list=0; list<2; list++){
1189 for(i=0; i<ref1->ref_count[list]; i++){
1190 const int poc = ref1->ref_poc[list][i];
1191 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1192 for(j=0; j<h->ref_count[list]; j++)
1193 if(h->ref_list[list][j].poc == poc){
1194 h->map_col_to_list0[list][i] = j;
1195 break;
1201 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1202 MpegEncContext * const s = &h->s;
1203 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1204 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1205 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1206 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1207 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1208 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1209 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1210 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1211 const int is_b8x8 = IS_8X8(*mb_type);
1212 int sub_mb_type;
1213 int i8, i4;
1215 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1216 /* FIXME save sub mb types from previous frames (or derive from MVs)
1217 * so we know exactly what block size to use */
1218 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1219 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1220 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1221 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1222 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1223 }else{
1224 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1225 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1227 if(!is_b8x8)
1228 *mb_type |= MB_TYPE_DIRECT2;
1230 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1232 if(h->direct_spatial_mv_pred){
1233 int ref[2];
1234 int mv[2][2];
1235 int list;
1237 /* ref = min(neighbors) */
1238 for(list=0; list<2; list++){
1239 int refa = h->ref_cache[list][scan8[0] - 1];
1240 int refb = h->ref_cache[list][scan8[0] - 8];
1241 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1242 if(refc == -2)
1243 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1244 ref[list] = refa;
1245 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1246 ref[list] = refb;
1247 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1248 ref[list] = refc;
1249 if(ref[list] < 0)
1250 ref[list] = -1;
1253 if(ref[0] < 0 && ref[1] < 0){
1254 ref[0] = ref[1] = 0;
1255 mv[0][0] = mv[0][1] =
1256 mv[1][0] = mv[1][1] = 0;
1257 }else{
1258 for(list=0; list<2; list++){
1259 if(ref[list] >= 0)
1260 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1261 else
1262 mv[list][0] = mv[list][1] = 0;
1266 if(ref[1] < 0){
1267 *mb_type &= ~MB_TYPE_P0L1;
1268 sub_mb_type &= ~MB_TYPE_P0L1;
1269 }else if(ref[0] < 0){
1270 *mb_type &= ~MB_TYPE_P0L0;
1271 sub_mb_type &= ~MB_TYPE_P0L0;
1274 if(IS_16X16(*mb_type)){
1275 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref[0], 1);
1276 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, ref[1], 1);
1277 if(!IS_INTRA(mb_type_col)
1278 && ( l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1
1279 || l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1)){
1280 if(ref[0] > 0)
1281 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1282 else
1283 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1284 if(ref[1] > 0)
1285 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1286 else
1287 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1288 }else{
1289 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1290 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1292 }else{
1293 for(i8=0; i8<4; i8++){
1294 const int x8 = i8&1;
1295 const int y8 = i8>>1;
1297 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1298 continue;
1299 h->sub_mb_type[i8] = sub_mb_type;
1301 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1302 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1303 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref[0], 1);
1304 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, ref[1], 1);
1306 /* col_zero_flag */
1307 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1308 || l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0)){
1309 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1310 for(i4=0; i4<4; i4++){
1311 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1312 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1313 if(ref[0] == 0)
1314 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1315 if(ref[1] == 0)
1316 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1322 }else{ /* direct temporal mv pred */
1323 if(IS_16X16(*mb_type)){
1324 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1325 if(IS_INTRA(mb_type_col)){
1326 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1327 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1328 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1329 }else{
1330 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1331 : h->map_col_to_list0[1][l1ref1[0]];
1332 const int dist_scale_factor = h->dist_scale_factor[ref0];
1333 const int16_t *mv_col = l1mv0[0];
1334 int mv_l0[2];
1335 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1336 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1337 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1338 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1339 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1341 }else{
1342 for(i8=0; i8<4; i8++){
1343 const int x8 = i8&1;
1344 const int y8 = i8>>1;
1345 int ref0, dist_scale_factor;
1347 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1348 continue;
1349 h->sub_mb_type[i8] = sub_mb_type;
1350 if(IS_INTRA(mb_type_col)){
1351 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1352 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1353 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1354 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1355 continue;
1358 ref0 = l1ref0[x8 + y8*h->b8_stride];
1359 if(ref0 >= 0)
1360 ref0 = h->map_col_to_list0[0][ref0];
1361 else
1362 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1363 dist_scale_factor = h->dist_scale_factor[ref0];
1365 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1366 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1367 for(i4=0; i4<4; i4++){
1368 const int16_t *mv_col = l1mv0[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1369 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1370 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1371 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1372 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1373 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1380 static inline void write_back_motion(H264Context *h, int mb_type){
1381 MpegEncContext * const s = &h->s;
1382 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1383 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1384 int list;
1386 for(list=0; list<2; list++){
1387 int y;
1388 if(!USES_LIST(mb_type, list)){
1389 if(1){ //FIXME skip or never read if mb_type doesn't use it
1390 for(y=0; y<4; y++){
1391 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1392 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1394 if( h->pps.cabac ) {
1395 /* FIXME needed ? */
1396 for(y=0; y<4; y++){
1397 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1398 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1401 for(y=0; y<2; y++){
1402 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1403 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1406 continue;
1409 for(y=0; y<4; y++){
1410 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1411 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1413 if( h->pps.cabac ) {
1414 for(y=0; y<4; y++){
1415 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1416 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1419 for(y=0; y<2; y++){
1420 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1421 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1425 if(h->slice_type == B_TYPE && h->pps.cabac){
1426 if(IS_8X8(mb_type)){
1427 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1428 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1429 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1435 * Decodes a network abstraction layer unit.
1436 * @param consumed is the number of bytes used as input
1437 * @param length is the length of the array
1438 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1439 * @returns decoded bytes, might be src+1 if no escapes
1441 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1442 int i, si, di;
1443 uint8_t *dst;
1445 // src[0]&0x80; //forbidden bit
1446 h->nal_ref_idc= src[0]>>5;
1447 h->nal_unit_type= src[0]&0x1F;
1449 src++; length--;
1450 #if 0
1451 for(i=0; i<length; i++)
1452 printf("%2X ", src[i]);
1453 #endif
1454 for(i=0; i+1<length; i+=2){
1455 if(src[i]) continue;
1456 if(i>0 && src[i-1]==0) i--;
1457 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1458 if(src[i+2]!=3){
1459 /* startcode, so we must be past the end */
1460 length=i;
1462 break;
1466 if(i>=length-1){ //no escaped 0
1467 *dst_length= length;
1468 *consumed= length+1; //+1 for the header
1469 return src;
1472 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1473 dst= h->rbsp_buffer;
1475 //printf("decoding esc\n");
1476 si=di=0;
1477 while(si<length){
1478 //remove escapes (very rare 1:2^22)
1479 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1480 if(src[si+2]==3){ //escape
1481 dst[di++]= 0;
1482 dst[di++]= 0;
1483 si+=3;
1484 continue;
1485 }else //next start code
1486 break;
1489 dst[di++]= src[si++];
1492 *dst_length= di;
1493 *consumed= si + 1;//+1 for the header
1494 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1495 return dst;
1498 #if 0
1500 * @param src the data which should be escaped
1501 * @param dst the target buffer, dst+1 == src is allowed as a special case
1502 * @param length the length of the src data
1503 * @param dst_length the length of the dst array
1504 * @returns length of escaped data in bytes or -1 if an error occured
1506 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1507 int i, escape_count, si, di;
1508 uint8_t *temp;
1510 assert(length>=0);
1511 assert(dst_length>0);
1513 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1515 if(length==0) return 1;
1517 escape_count= 0;
1518 for(i=0; i<length; i+=2){
1519 if(src[i]) continue;
1520 if(i>0 && src[i-1]==0)
1521 i--;
1522 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1523 escape_count++;
1524 i+=2;
1528 if(escape_count==0){
1529 if(dst+1 != src)
1530 memcpy(dst+1, src, length);
1531 return length + 1;
1534 if(length + escape_count + 1> dst_length)
1535 return -1;
1537 //this should be damn rare (hopefully)
1539 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1540 temp= h->rbsp_buffer;
1541 //printf("encoding esc\n");
1543 si= 0;
1544 di= 0;
1545 while(si < length){
1546 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1547 temp[di++]= 0; si++;
1548 temp[di++]= 0; si++;
1549 temp[di++]= 3;
1550 temp[di++]= src[si++];
1552 else
1553 temp[di++]= src[si++];
1555 memcpy(dst+1, temp, length+escape_count);
1557 assert(di == length+escape_count);
1559 return di + 1;
1563 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1565 static void encode_rbsp_trailing(PutBitContext *pb){
1566 int length;
1567 put_bits(pb, 1, 1);
1568 length= (-put_bits_count(pb))&7;
1569 if(length) put_bits(pb, length, 0);
1571 #endif
1574 * identifies the exact end of the bitstream
1575 * @return the length of the trailing, or 0 if damaged
1577 static int decode_rbsp_trailing(uint8_t *src){
1578 int v= *src;
1579 int r;
1581 tprintf("rbsp trailing %X\n", v);
1583 for(r=1; r<9; r++){
1584 if(v&1) return r;
1585 v>>=1;
1587 return 0;
1591 * idct tranforms the 16 dc values and dequantize them.
1592 * @param qp quantization parameter
1594 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1595 const int qmul= dequant_coeff[qp][0];
1596 #define stride 16
1597 int i;
1598 int temp[16]; //FIXME check if this is a good idea
1599 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1600 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1602 //memset(block, 64, 2*256);
1603 //return;
1604 for(i=0; i<4; i++){
1605 const int offset= y_offset[i];
1606 const int z0= block[offset+stride*0] + block[offset+stride*4];
1607 const int z1= block[offset+stride*0] - block[offset+stride*4];
1608 const int z2= block[offset+stride*1] - block[offset+stride*5];
1609 const int z3= block[offset+stride*1] + block[offset+stride*5];
1611 temp[4*i+0]= z0+z3;
1612 temp[4*i+1]= z1+z2;
1613 temp[4*i+2]= z1-z2;
1614 temp[4*i+3]= z0-z3;
1617 for(i=0; i<4; i++){
1618 const int offset= x_offset[i];
1619 const int z0= temp[4*0+i] + temp[4*2+i];
1620 const int z1= temp[4*0+i] - temp[4*2+i];
1621 const int z2= temp[4*1+i] - temp[4*3+i];
1622 const int z3= temp[4*1+i] + temp[4*3+i];
1624 block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1625 block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1626 block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1627 block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1631 #if 0
1633 * dct tranforms the 16 dc values.
1634 * @param qp quantization parameter ??? FIXME
1636 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1637 // const int qmul= dequant_coeff[qp][0];
1638 int i;
1639 int temp[16]; //FIXME check if this is a good idea
1640 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1641 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1643 for(i=0; i<4; i++){
1644 const int offset= y_offset[i];
1645 const int z0= block[offset+stride*0] + block[offset+stride*4];
1646 const int z1= block[offset+stride*0] - block[offset+stride*4];
1647 const int z2= block[offset+stride*1] - block[offset+stride*5];
1648 const int z3= block[offset+stride*1] + block[offset+stride*5];
1650 temp[4*i+0]= z0+z3;
1651 temp[4*i+1]= z1+z2;
1652 temp[4*i+2]= z1-z2;
1653 temp[4*i+3]= z0-z3;
1656 for(i=0; i<4; i++){
1657 const int offset= x_offset[i];
1658 const int z0= temp[4*0+i] + temp[4*2+i];
1659 const int z1= temp[4*0+i] - temp[4*2+i];
1660 const int z2= temp[4*1+i] - temp[4*3+i];
1661 const int z3= temp[4*1+i] + temp[4*3+i];
1663 block[stride*0 +offset]= (z0 + z3)>>1;
1664 block[stride*2 +offset]= (z1 + z2)>>1;
1665 block[stride*8 +offset]= (z1 - z2)>>1;
1666 block[stride*10+offset]= (z0 - z3)>>1;
1669 #endif
1671 #undef xStride
1672 #undef stride
1674 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1675 const int qmul= dequant_coeff[qp][0];
1676 const int stride= 16*2;
1677 const int xStride= 16;
1678 int a,b,c,d,e;
1680 a= block[stride*0 + xStride*0];
1681 b= block[stride*0 + xStride*1];
1682 c= block[stride*1 + xStride*0];
1683 d= block[stride*1 + xStride*1];
1685 e= a-b;
1686 a= a+b;
1687 b= c-d;
1688 c= c+d;
1690 block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1691 block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1692 block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1693 block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1696 #if 0
1697 static void chroma_dc_dct_c(DCTELEM *block){
1698 const int stride= 16*2;
1699 const int xStride= 16;
1700 int a,b,c,d,e;
1702 a= block[stride*0 + xStride*0];
1703 b= block[stride*0 + xStride*1];
1704 c= block[stride*1 + xStride*0];
1705 d= block[stride*1 + xStride*1];
1707 e= a-b;
1708 a= a+b;
1709 b= c-d;
1710 c= c+d;
1712 block[stride*0 + xStride*0]= (a+c);
1713 block[stride*0 + xStride*1]= (e+b);
1714 block[stride*1 + xStride*0]= (a-c);
1715 block[stride*1 + xStride*1]= (e-b);
1717 #endif
1720 * gets the chroma qp.
1722 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1724 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1728 #if 0
1729 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1730 int i;
1731 //FIXME try int temp instead of block
1733 for(i=0; i<4; i++){
1734 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1735 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1736 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1737 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1738 const int z0= d0 + d3;
1739 const int z3= d0 - d3;
1740 const int z1= d1 + d2;
1741 const int z2= d1 - d2;
1743 block[0 + 4*i]= z0 + z1;
1744 block[1 + 4*i]= 2*z3 + z2;
1745 block[2 + 4*i]= z0 - z1;
1746 block[3 + 4*i]= z3 - 2*z2;
1749 for(i=0; i<4; i++){
1750 const int z0= block[0*4 + i] + block[3*4 + i];
1751 const int z3= block[0*4 + i] - block[3*4 + i];
1752 const int z1= block[1*4 + i] + block[2*4 + i];
1753 const int z2= block[1*4 + i] - block[2*4 + i];
1755 block[0*4 + i]= z0 + z1;
1756 block[1*4 + i]= 2*z3 + z2;
1757 block[2*4 + i]= z0 - z1;
1758 block[3*4 + i]= z3 - 2*z2;
1761 #endif
1763 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1764 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1765 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1766 int i;
1767 const int * const quant_table= quant_coeff[qscale];
1768 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1769 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1770 const unsigned int threshold2= (threshold1<<1);
1771 int last_non_zero;
1773 if(seperate_dc){
1774 if(qscale<=18){
1775 //avoid overflows
1776 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1777 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1778 const unsigned int dc_threshold2= (dc_threshold1<<1);
1780 int level= block[0]*quant_coeff[qscale+18][0];
1781 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1782 if(level>0){
1783 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1784 block[0]= level;
1785 }else{
1786 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1787 block[0]= -level;
1789 // last_non_zero = i;
1790 }else{
1791 block[0]=0;
1793 }else{
1794 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1795 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1796 const unsigned int dc_threshold2= (dc_threshold1<<1);
1798 int level= block[0]*quant_table[0];
1799 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1800 if(level>0){
1801 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1802 block[0]= level;
1803 }else{
1804 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1805 block[0]= -level;
1807 // last_non_zero = i;
1808 }else{
1809 block[0]=0;
1812 last_non_zero= 0;
1813 i=1;
1814 }else{
1815 last_non_zero= -1;
1816 i=0;
1819 for(; i<16; i++){
1820 const int j= scantable[i];
1821 int level= block[j]*quant_table[j];
1823 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1824 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1825 if(((unsigned)(level+threshold1))>threshold2){
1826 if(level>0){
1827 level= (bias + level)>>QUANT_SHIFT;
1828 block[j]= level;
1829 }else{
1830 level= (bias - level)>>QUANT_SHIFT;
1831 block[j]= -level;
1833 last_non_zero = i;
1834 }else{
1835 block[j]=0;
1839 return last_non_zero;
1842 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1843 const uint32_t a= ((uint32_t*)(src-stride))[0];
1844 ((uint32_t*)(src+0*stride))[0]= a;
1845 ((uint32_t*)(src+1*stride))[0]= a;
1846 ((uint32_t*)(src+2*stride))[0]= a;
1847 ((uint32_t*)(src+3*stride))[0]= a;
1850 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1851 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1852 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1853 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1854 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1857 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1858 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1859 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1861 ((uint32_t*)(src+0*stride))[0]=
1862 ((uint32_t*)(src+1*stride))[0]=
1863 ((uint32_t*)(src+2*stride))[0]=
1864 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1867 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1868 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1870 ((uint32_t*)(src+0*stride))[0]=
1871 ((uint32_t*)(src+1*stride))[0]=
1872 ((uint32_t*)(src+2*stride))[0]=
1873 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1876 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1877 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1879 ((uint32_t*)(src+0*stride))[0]=
1880 ((uint32_t*)(src+1*stride))[0]=
1881 ((uint32_t*)(src+2*stride))[0]=
1882 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1885 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1886 ((uint32_t*)(src+0*stride))[0]=
1887 ((uint32_t*)(src+1*stride))[0]=
1888 ((uint32_t*)(src+2*stride))[0]=
1889 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1893 #define LOAD_TOP_RIGHT_EDGE\
1894 const int t4= topright[0];\
1895 const int t5= topright[1];\
1896 const int t6= topright[2];\
1897 const int t7= topright[3];\
1899 #define LOAD_LEFT_EDGE\
1900 const int l0= src[-1+0*stride];\
1901 const int l1= src[-1+1*stride];\
1902 const int l2= src[-1+2*stride];\
1903 const int l3= src[-1+3*stride];\
1905 #define LOAD_TOP_EDGE\
1906 const int t0= src[ 0-1*stride];\
1907 const int t1= src[ 1-1*stride];\
1908 const int t2= src[ 2-1*stride];\
1909 const int t3= src[ 3-1*stride];\
1911 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1912 const int lt= src[-1-1*stride];
1913 LOAD_TOP_EDGE
1914 LOAD_LEFT_EDGE
1916 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1917 src[0+2*stride]=
1918 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1919 src[0+1*stride]=
1920 src[1+2*stride]=
1921 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1922 src[0+0*stride]=
1923 src[1+1*stride]=
1924 src[2+2*stride]=
1925 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1926 src[1+0*stride]=
1927 src[2+1*stride]=
1928 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1929 src[2+0*stride]=
1930 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1931 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1934 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1935 LOAD_TOP_EDGE
1936 LOAD_TOP_RIGHT_EDGE
1937 // LOAD_LEFT_EDGE
1939 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1940 src[1+0*stride]=
1941 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1942 src[2+0*stride]=
1943 src[1+1*stride]=
1944 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1945 src[3+0*stride]=
1946 src[2+1*stride]=
1947 src[1+2*stride]=
1948 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1949 src[3+1*stride]=
1950 src[2+2*stride]=
1951 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1952 src[3+2*stride]=
1953 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1954 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1957 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1958 const int lt= src[-1-1*stride];
1959 LOAD_TOP_EDGE
1960 LOAD_LEFT_EDGE
1961 const __attribute__((unused)) int unu= l3;
1963 src[0+0*stride]=
1964 src[1+2*stride]=(lt + t0 + 1)>>1;
1965 src[1+0*stride]=
1966 src[2+2*stride]=(t0 + t1 + 1)>>1;
1967 src[2+0*stride]=
1968 src[3+2*stride]=(t1 + t2 + 1)>>1;
1969 src[3+0*stride]=(t2 + t3 + 1)>>1;
1970 src[0+1*stride]=
1971 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1972 src[1+1*stride]=
1973 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1974 src[2+1*stride]=
1975 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1976 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1977 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1978 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1981 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1982 LOAD_TOP_EDGE
1983 LOAD_TOP_RIGHT_EDGE
1984 const __attribute__((unused)) int unu= t7;
1986 src[0+0*stride]=(t0 + t1 + 1)>>1;
1987 src[1+0*stride]=
1988 src[0+2*stride]=(t1 + t2 + 1)>>1;
1989 src[2+0*stride]=
1990 src[1+2*stride]=(t2 + t3 + 1)>>1;
1991 src[3+0*stride]=
1992 src[2+2*stride]=(t3 + t4+ 1)>>1;
1993 src[3+2*stride]=(t4 + t5+ 1)>>1;
1994 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1995 src[1+1*stride]=
1996 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1997 src[2+1*stride]=
1998 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1999 src[3+1*stride]=
2000 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2001 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2004 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2005 LOAD_LEFT_EDGE
2007 src[0+0*stride]=(l0 + l1 + 1)>>1;
2008 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2009 src[2+0*stride]=
2010 src[0+1*stride]=(l1 + l2 + 1)>>1;
2011 src[3+0*stride]=
2012 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2013 src[2+1*stride]=
2014 src[0+2*stride]=(l2 + l3 + 1)>>1;
2015 src[3+1*stride]=
2016 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2017 src[3+2*stride]=
2018 src[1+3*stride]=
2019 src[0+3*stride]=
2020 src[2+2*stride]=
2021 src[2+3*stride]=
2022 src[3+3*stride]=l3;
2025 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2026 const int lt= src[-1-1*stride];
2027 LOAD_TOP_EDGE
2028 LOAD_LEFT_EDGE
2029 const __attribute__((unused)) int unu= t3;
2031 src[0+0*stride]=
2032 src[2+1*stride]=(lt + l0 + 1)>>1;
2033 src[1+0*stride]=
2034 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2035 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2036 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2037 src[0+1*stride]=
2038 src[2+2*stride]=(l0 + l1 + 1)>>1;
2039 src[1+1*stride]=
2040 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2041 src[0+2*stride]=
2042 src[2+3*stride]=(l1 + l2+ 1)>>1;
2043 src[1+2*stride]=
2044 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2045 src[0+3*stride]=(l2 + l3 + 1)>>1;
2046 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2049 static void pred16x16_vertical_c(uint8_t *src, int stride){
2050 int i;
2051 const uint32_t a= ((uint32_t*)(src-stride))[0];
2052 const uint32_t b= ((uint32_t*)(src-stride))[1];
2053 const uint32_t c= ((uint32_t*)(src-stride))[2];
2054 const uint32_t d= ((uint32_t*)(src-stride))[3];
2056 for(i=0; i<16; i++){
2057 ((uint32_t*)(src+i*stride))[0]= a;
2058 ((uint32_t*)(src+i*stride))[1]= b;
2059 ((uint32_t*)(src+i*stride))[2]= c;
2060 ((uint32_t*)(src+i*stride))[3]= d;
2064 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2065 int i;
2067 for(i=0; i<16; i++){
2068 ((uint32_t*)(src+i*stride))[0]=
2069 ((uint32_t*)(src+i*stride))[1]=
2070 ((uint32_t*)(src+i*stride))[2]=
2071 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2075 static void pred16x16_dc_c(uint8_t *src, int stride){
2076 int i, dc=0;
2078 for(i=0;i<16; i++){
2079 dc+= src[-1+i*stride];
2082 for(i=0;i<16; i++){
2083 dc+= src[i-stride];
2086 dc= 0x01010101*((dc + 16)>>5);
2088 for(i=0; i<16; i++){
2089 ((uint32_t*)(src+i*stride))[0]=
2090 ((uint32_t*)(src+i*stride))[1]=
2091 ((uint32_t*)(src+i*stride))[2]=
2092 ((uint32_t*)(src+i*stride))[3]= dc;
2096 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2097 int i, dc=0;
2099 for(i=0;i<16; i++){
2100 dc+= src[-1+i*stride];
2103 dc= 0x01010101*((dc + 8)>>4);
2105 for(i=0; i<16; i++){
2106 ((uint32_t*)(src+i*stride))[0]=
2107 ((uint32_t*)(src+i*stride))[1]=
2108 ((uint32_t*)(src+i*stride))[2]=
2109 ((uint32_t*)(src+i*stride))[3]= dc;
2113 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2114 int i, dc=0;
2116 for(i=0;i<16; i++){
2117 dc+= src[i-stride];
2119 dc= 0x01010101*((dc + 8)>>4);
2121 for(i=0; i<16; i++){
2122 ((uint32_t*)(src+i*stride))[0]=
2123 ((uint32_t*)(src+i*stride))[1]=
2124 ((uint32_t*)(src+i*stride))[2]=
2125 ((uint32_t*)(src+i*stride))[3]= dc;
2129 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2130 int i;
2132 for(i=0; i<16; i++){
2133 ((uint32_t*)(src+i*stride))[0]=
2134 ((uint32_t*)(src+i*stride))[1]=
2135 ((uint32_t*)(src+i*stride))[2]=
2136 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2140 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2141 int i, j, k;
2142 int a;
2143 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2144 const uint8_t * const src0 = src+7-stride;
2145 const uint8_t *src1 = src+8*stride-1;
2146 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2147 int H = src0[1] - src0[-1];
2148 int V = src1[0] - src2[ 0];
2149 for(k=2; k<=8; ++k) {
2150 src1 += stride; src2 -= stride;
2151 H += k*(src0[k] - src0[-k]);
2152 V += k*(src1[0] - src2[ 0]);
2154 if(svq3){
2155 H = ( 5*(H/4) ) / 16;
2156 V = ( 5*(V/4) ) / 16;
2158 /* required for 100% accuracy */
2159 i = H; H = V; V = i;
2160 }else{
2161 H = ( 5*H+32 ) >> 6;
2162 V = ( 5*V+32 ) >> 6;
2165 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2166 for(j=16; j>0; --j) {
2167 int b = a;
2168 a += V;
2169 for(i=-16; i<0; i+=4) {
2170 src[16+i] = cm[ (b ) >> 5 ];
2171 src[17+i] = cm[ (b+ H) >> 5 ];
2172 src[18+i] = cm[ (b+2*H) >> 5 ];
2173 src[19+i] = cm[ (b+3*H) >> 5 ];
2174 b += 4*H;
2176 src += stride;
2180 static void pred16x16_plane_c(uint8_t *src, int stride){
2181 pred16x16_plane_compat_c(src, stride, 0);
2184 static void pred8x8_vertical_c(uint8_t *src, int stride){
2185 int i;
2186 const uint32_t a= ((uint32_t*)(src-stride))[0];
2187 const uint32_t b= ((uint32_t*)(src-stride))[1];
2189 for(i=0; i<8; i++){
2190 ((uint32_t*)(src+i*stride))[0]= a;
2191 ((uint32_t*)(src+i*stride))[1]= b;
2195 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2196 int i;
2198 for(i=0; i<8; i++){
2199 ((uint32_t*)(src+i*stride))[0]=
2200 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2204 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2205 int i;
2207 for(i=0; i<8; i++){
2208 ((uint32_t*)(src+i*stride))[0]=
2209 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2213 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2214 int i;
2215 int dc0, dc2;
2217 dc0=dc2=0;
2218 for(i=0;i<4; i++){
2219 dc0+= src[-1+i*stride];
2220 dc2+= src[-1+(i+4)*stride];
2222 dc0= 0x01010101*((dc0 + 2)>>2);
2223 dc2= 0x01010101*((dc2 + 2)>>2);
2225 for(i=0; i<4; i++){
2226 ((uint32_t*)(src+i*stride))[0]=
2227 ((uint32_t*)(src+i*stride))[1]= dc0;
2229 for(i=4; i<8; i++){
2230 ((uint32_t*)(src+i*stride))[0]=
2231 ((uint32_t*)(src+i*stride))[1]= dc2;
2235 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2236 int i;
2237 int dc0, dc1;
2239 dc0=dc1=0;
2240 for(i=0;i<4; i++){
2241 dc0+= src[i-stride];
2242 dc1+= src[4+i-stride];
2244 dc0= 0x01010101*((dc0 + 2)>>2);
2245 dc1= 0x01010101*((dc1 + 2)>>2);
2247 for(i=0; i<4; i++){
2248 ((uint32_t*)(src+i*stride))[0]= dc0;
2249 ((uint32_t*)(src+i*stride))[1]= dc1;
2251 for(i=4; i<8; i++){
2252 ((uint32_t*)(src+i*stride))[0]= dc0;
2253 ((uint32_t*)(src+i*stride))[1]= dc1;
2258 static void pred8x8_dc_c(uint8_t *src, int stride){
2259 int i;
2260 int dc0, dc1, dc2, dc3;
2262 dc0=dc1=dc2=0;
2263 for(i=0;i<4; i++){
2264 dc0+= src[-1+i*stride] + src[i-stride];
2265 dc1+= src[4+i-stride];
2266 dc2+= src[-1+(i+4)*stride];
2268 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2269 dc0= 0x01010101*((dc0 + 4)>>3);
2270 dc1= 0x01010101*((dc1 + 2)>>2);
2271 dc2= 0x01010101*((dc2 + 2)>>2);
2273 for(i=0; i<4; i++){
2274 ((uint32_t*)(src+i*stride))[0]= dc0;
2275 ((uint32_t*)(src+i*stride))[1]= dc1;
2277 for(i=4; i<8; i++){
2278 ((uint32_t*)(src+i*stride))[0]= dc2;
2279 ((uint32_t*)(src+i*stride))[1]= dc3;
2283 static void pred8x8_plane_c(uint8_t *src, int stride){
2284 int j, k;
2285 int a;
2286 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2287 const uint8_t * const src0 = src+3-stride;
2288 const uint8_t *src1 = src+4*stride-1;
2289 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2290 int H = src0[1] - src0[-1];
2291 int V = src1[0] - src2[ 0];
2292 for(k=2; k<=4; ++k) {
2293 src1 += stride; src2 -= stride;
2294 H += k*(src0[k] - src0[-k]);
2295 V += k*(src1[0] - src2[ 0]);
2297 H = ( 17*H+16 ) >> 5;
2298 V = ( 17*V+16 ) >> 5;
2300 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2301 for(j=8; j>0; --j) {
2302 int b = a;
2303 a += V;
2304 src[0] = cm[ (b ) >> 5 ];
2305 src[1] = cm[ (b+ H) >> 5 ];
2306 src[2] = cm[ (b+2*H) >> 5 ];
2307 src[3] = cm[ (b+3*H) >> 5 ];
2308 src[4] = cm[ (b+4*H) >> 5 ];
2309 src[5] = cm[ (b+5*H) >> 5 ];
2310 src[6] = cm[ (b+6*H) >> 5 ];
2311 src[7] = cm[ (b+7*H) >> 5 ];
2312 src += stride;
2316 #define SRC(x,y) src[(x)+(y)*stride]
2317 #define PL(y) \
2318 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2319 #define PREDICT_8x8_LOAD_LEFT \
2320 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2321 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2322 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2323 const int l7 = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2325 #define PT(x) \
2326 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2327 #define PREDICT_8x8_LOAD_TOP \
2328 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2329 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2330 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2331 const int t7 = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2332 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2334 #define PTR(x) \
2335 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2336 #define PREDICT_8x8_LOAD_TOPRIGHT \
2337 int t8, t9, t10, t11, t12, t13, t14, t15; \
2338 if(has_topright) { \
2339 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2340 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2341 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2343 #define PREDICT_8x8_LOAD_TOPLEFT \
2344 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2346 #define PREDICT_8x8_DC(v) \
2347 int y; \
2348 for( y = 0; y < 8; y++ ) { \
2349 ((uint32_t*)src)[0] = \
2350 ((uint32_t*)src)[1] = v; \
2351 src += stride; \
2354 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2356 PREDICT_8x8_DC(0x80808080);
2358 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2360 PREDICT_8x8_LOAD_LEFT;
2361 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2362 PREDICT_8x8_DC(dc);
2364 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2366 PREDICT_8x8_LOAD_TOP;
2367 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2368 PREDICT_8x8_DC(dc);
2370 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2372 PREDICT_8x8_LOAD_LEFT;
2373 PREDICT_8x8_LOAD_TOP;
2374 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2375 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2376 PREDICT_8x8_DC(dc);
2378 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2380 PREDICT_8x8_LOAD_LEFT;
2381 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2382 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2383 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2384 #undef ROW
2386 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2388 int y;
2389 PREDICT_8x8_LOAD_TOP;
2390 src[0] = t0;
2391 src[1] = t1;
2392 src[2] = t2;
2393 src[3] = t3;
2394 src[4] = t4;
2395 src[5] = t5;
2396 src[6] = t6;
2397 src[7] = t7;
2398 for( y = 1; y < 8; y++ )
2399 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2401 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2403 PREDICT_8x8_LOAD_TOP;
2404 PREDICT_8x8_LOAD_TOPRIGHT;
2405 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2406 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2407 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2408 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2409 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2410 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2411 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2412 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2413 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2414 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2415 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2416 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2417 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2418 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2419 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2421 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2423 PREDICT_8x8_LOAD_TOP;
2424 PREDICT_8x8_LOAD_LEFT;
2425 PREDICT_8x8_LOAD_TOPLEFT;
2426 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2427 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2428 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2429 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2430 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2431 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2432 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2433 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2434 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2435 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2436 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2437 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2438 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2439 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2440 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2443 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2445 PREDICT_8x8_LOAD_TOP;
2446 PREDICT_8x8_LOAD_LEFT;
2447 PREDICT_8x8_LOAD_TOPLEFT;
2448 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2449 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2450 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2451 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2452 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2453 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2454 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2455 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2456 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2457 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2458 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2459 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2460 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2461 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2462 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2463 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2464 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2465 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2466 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2467 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2468 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2469 SRC(7,0)= (t6 + t7 + 1) >> 1;
2471 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2473 PREDICT_8x8_LOAD_TOP;
2474 PREDICT_8x8_LOAD_LEFT;
2475 PREDICT_8x8_LOAD_TOPLEFT;
2476 SRC(0,7)= (l6 + l7 + 1) >> 1;
2477 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2478 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2479 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2480 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2481 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2482 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2483 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2484 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2485 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2486 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2487 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2488 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2489 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2490 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2491 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2492 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2493 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2494 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2495 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2496 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2497 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2499 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2501 PREDICT_8x8_LOAD_TOP;
2502 PREDICT_8x8_LOAD_TOPRIGHT;
2503 SRC(0,0)= (t0 + t1 + 1) >> 1;
2504 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2505 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2506 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2507 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2508 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2509 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2510 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2511 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2512 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2513 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2514 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2515 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2516 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2517 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2518 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2519 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2520 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2521 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2522 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2523 SRC(7,6)= (t10 + t11 + 1) >> 1;
2524 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2526 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2528 PREDICT_8x8_LOAD_LEFT;
2529 SRC(0,0)= (l0 + l1 + 1) >> 1;
2530 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2531 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2532 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2533 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2534 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2535 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2536 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2537 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2538 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2539 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2540 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2541 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2542 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2543 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2544 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2545 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2546 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2548 #undef PREDICT_8x8_LOAD_LEFT
2549 #undef PREDICT_8x8_LOAD_TOP
2550 #undef PREDICT_8x8_LOAD_TOPLEFT
2551 #undef PREDICT_8x8_LOAD_TOPRIGHT
2552 #undef PREDICT_8x8_DC
2553 #undef PTR
2554 #undef PT
2555 #undef PL
2556 #undef SRC
2558 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2559 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2560 int src_x_offset, int src_y_offset,
2561 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2562 MpegEncContext * const s = &h->s;
2563 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2564 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2565 const int luma_xy= (mx&3) + ((my&3)<<2);
2566 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2567 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2568 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2569 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2570 int extra_height= extra_width;
2571 int emu=0;
2572 const int full_mx= mx>>2;
2573 const int full_my= my>>2;
2575 assert(pic->data[0]);
2577 if(mx&7) extra_width -= 3;
2578 if(my&7) extra_height -= 3;
2580 if( full_mx < 0-extra_width
2581 || full_my < 0-extra_height
2582 || full_mx + 16/*FIXME*/ > s->width + extra_width
2583 || full_my + 16/*FIXME*/ > s->height + extra_height){
2584 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
2585 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2586 emu=1;
2589 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2590 if(!square){
2591 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2594 if(s->flags&CODEC_FLAG_GRAY) return;
2596 if(emu){
2597 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
2598 src_cb= s->edge_emu_buffer;
2600 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2602 if(emu){
2603 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
2604 src_cr= s->edge_emu_buffer;
2606 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2609 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2610 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2611 int x_offset, int y_offset,
2612 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2613 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2614 int list0, int list1){
2615 MpegEncContext * const s = &h->s;
2616 qpel_mc_func *qpix_op= qpix_put;
2617 h264_chroma_mc_func chroma_op= chroma_put;
2619 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2620 dest_cb += x_offset + y_offset*s->uvlinesize;
2621 dest_cr += x_offset + y_offset*s->uvlinesize;
2622 x_offset += 8*s->mb_x;
2623 y_offset += 8*s->mb_y;
2625 if(list0){
2626 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2627 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2628 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2629 qpix_op, chroma_op);
2631 qpix_op= qpix_avg;
2632 chroma_op= chroma_avg;
2635 if(list1){
2636 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2637 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2638 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2639 qpix_op, chroma_op);
2643 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2644 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2645 int x_offset, int y_offset,
2646 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2647 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2648 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2649 int list0, int list1){
2650 MpegEncContext * const s = &h->s;
2652 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2653 dest_cb += x_offset + y_offset*s->uvlinesize;
2654 dest_cr += x_offset + y_offset*s->uvlinesize;
2655 x_offset += 8*s->mb_x;
2656 y_offset += 8*s->mb_y;
2658 if(list0 && list1){
2659 /* don't optimize for luma-only case, since B-frames usually
2660 * use implicit weights => chroma too. */
2661 uint8_t *tmp_cb = s->obmc_scratchpad;
2662 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2663 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2664 int refn0 = h->ref_cache[0][ scan8[n] ];
2665 int refn1 = h->ref_cache[1][ scan8[n] ];
2667 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2668 dest_y, dest_cb, dest_cr,
2669 x_offset, y_offset, qpix_put, chroma_put);
2670 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2671 tmp_y, tmp_cb, tmp_cr,
2672 x_offset, y_offset, qpix_put, chroma_put);
2674 if(h->use_weight == 2){
2675 int weight0 = h->implicit_weight[refn0][refn1];
2676 int weight1 = 64 - weight0;
2677 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0);
2678 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
2679 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
2680 }else{
2681 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2682 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2683 h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
2684 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2685 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2686 h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
2687 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2688 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2689 h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
2691 }else{
2692 int list = list1 ? 1 : 0;
2693 int refn = h->ref_cache[list][ scan8[n] ];
2694 Picture *ref= &h->ref_list[list][refn];
2695 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2696 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2697 qpix_put, chroma_put);
2699 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2700 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2701 if(h->use_weight_chroma){
2702 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2703 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2704 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2705 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2710 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2711 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2712 int x_offset, int y_offset,
2713 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2714 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2715 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2716 int list0, int list1){
2717 if((h->use_weight==2 && list0 && list1
2718 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2719 || h->use_weight==1)
2720 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2721 x_offset, y_offset, qpix_put, chroma_put,
2722 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2723 else
2724 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2725 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2728 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2729 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2730 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2731 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2732 MpegEncContext * const s = &h->s;
2733 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2734 const int mb_type= s->current_picture.mb_type[mb_xy];
2736 assert(IS_INTER(mb_type));
2738 if(IS_16X16(mb_type)){
2739 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2740 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2741 &weight_op[0], &weight_avg[0],
2742 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2743 }else if(IS_16X8(mb_type)){
2744 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2745 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2746 &weight_op[1], &weight_avg[1],
2747 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2748 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2749 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2750 &weight_op[1], &weight_avg[1],
2751 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2752 }else if(IS_8X16(mb_type)){
2753 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2754 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2755 &weight_op[2], &weight_avg[2],
2756 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2757 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2758 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2759 &weight_op[2], &weight_avg[2],
2760 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2761 }else{
2762 int i;
2764 assert(IS_8X8(mb_type));
2766 for(i=0; i<4; i++){
2767 const int sub_mb_type= h->sub_mb_type[i];
2768 const int n= 4*i;
2769 int x_offset= (i&1)<<2;
2770 int y_offset= (i&2)<<1;
2772 if(IS_SUB_8X8(sub_mb_type)){
2773 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2774 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2775 &weight_op[3], &weight_avg[3],
2776 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2777 }else if(IS_SUB_8X4(sub_mb_type)){
2778 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2779 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2780 &weight_op[4], &weight_avg[4],
2781 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2782 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2783 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2784 &weight_op[4], &weight_avg[4],
2785 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2786 }else if(IS_SUB_4X8(sub_mb_type)){
2787 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2788 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2789 &weight_op[5], &weight_avg[5],
2790 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2791 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2792 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2793 &weight_op[5], &weight_avg[5],
2794 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2795 }else{
2796 int j;
2797 assert(IS_SUB_4X4(sub_mb_type));
2798 for(j=0; j<4; j++){
2799 int sub_x_offset= x_offset + 2*(j&1);
2800 int sub_y_offset= y_offset + (j&2);
2801 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2802 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2803 &weight_op[6], &weight_avg[6],
2804 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2811 static void decode_init_vlc(H264Context *h){
2812 static int done = 0;
2814 if (!done) {
2815 int i;
2816 done = 1;
2818 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2819 &chroma_dc_coeff_token_len [0], 1, 1,
2820 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2822 for(i=0; i<4; i++){
2823 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2824 &coeff_token_len [i][0], 1, 1,
2825 &coeff_token_bits[i][0], 1, 1, 1);
2828 for(i=0; i<3; i++){
2829 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2830 &chroma_dc_total_zeros_len [i][0], 1, 1,
2831 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2833 for(i=0; i<15; i++){
2834 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2835 &total_zeros_len [i][0], 1, 1,
2836 &total_zeros_bits[i][0], 1, 1, 1);
2839 for(i=0; i<6; i++){
2840 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2841 &run_len [i][0], 1, 1,
2842 &run_bits[i][0], 1, 1, 1);
2844 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2845 &run_len [6][0], 1, 1,
2846 &run_bits[6][0], 1, 1, 1);
2851 * Sets the intra prediction function pointers.
2853 static void init_pred_ptrs(H264Context *h){
2854 // MpegEncContext * const s = &h->s;
2856 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2857 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2858 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2859 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2860 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2861 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2862 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2863 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2864 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2865 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2866 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2867 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2869 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2870 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2871 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2872 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2873 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2874 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2875 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2876 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2877 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2878 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2879 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2880 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2882 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2883 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2884 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2885 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2886 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2887 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2888 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2890 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2891 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2892 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2893 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2894 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2895 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2896 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2899 static void free_tables(H264Context *h){
2900 av_freep(&h->intra4x4_pred_mode);
2901 av_freep(&h->chroma_pred_mode_table);
2902 av_freep(&h->cbp_table);
2903 av_freep(&h->mvd_table[0]);
2904 av_freep(&h->mvd_table[1]);
2905 av_freep(&h->direct_table);
2906 av_freep(&h->non_zero_count);
2907 av_freep(&h->slice_table_base);
2908 av_freep(&h->top_borders[1]);
2909 av_freep(&h->top_borders[0]);
2910 h->slice_table= NULL;
2912 av_freep(&h->mb2b_xy);
2913 av_freep(&h->mb2b8_xy);
2915 av_freep(&h->dequant4_coeff);
2916 av_freep(&h->dequant8_coeff);
2918 av_freep(&h->s.obmc_scratchpad);
2922 * allocates tables.
2923 * needs width/height
2925 static int alloc_tables(H264Context *h){
2926 MpegEncContext * const s = &h->s;
2927 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2928 int x,y,q;
2930 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2932 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2933 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
2934 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2935 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2936 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2938 if( h->pps.cabac ) {
2939 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2940 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2941 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2942 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2945 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
2946 h->slice_table= h->slice_table_base + s->mb_stride + 1;
2948 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2949 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2950 for(y=0; y<s->mb_height; y++){
2951 for(x=0; x<s->mb_width; x++){
2952 const int mb_xy= x + y*s->mb_stride;
2953 const int b_xy = 4*x + 4*y*h->b_stride;
2954 const int b8_xy= 2*x + 2*y*h->b8_stride;
2956 h->mb2b_xy [mb_xy]= b_xy;
2957 h->mb2b8_xy[mb_xy]= b8_xy;
2961 CHECKED_ALLOCZ(h->dequant4_coeff, 52*16 * sizeof(uint16_t));
2962 CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t));
2963 memcpy(h->dequant4_coeff, dequant_coeff, 52*16 * sizeof(uint16_t));
2964 for(q=0; q<52; q++){
2965 int shift = div6[q];
2966 int idx = rem6[q];
2967 if(shift >= 2) // qp<12 are shifted during dequant
2968 shift -= 2;
2969 for(x=0; x<64; x++)
2970 h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][
2971 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] << shift;
2973 if(h->sps.transform_bypass){
2974 for(x=0; x<16; x++)
2975 h->dequant4_coeff[0][x] = 1;
2976 for(x=0; x<64; x++)
2977 h->dequant8_coeff[0][x] = 1<<2;
2980 s->obmc_scratchpad = NULL;
2982 return 0;
2983 fail:
2984 free_tables(h);
2985 return -1;
2988 static void common_init(H264Context *h){
2989 MpegEncContext * const s = &h->s;
2991 s->width = s->avctx->width;
2992 s->height = s->avctx->height;
2993 s->codec_id= s->avctx->codec->id;
2995 init_pred_ptrs(h);
2997 s->unrestricted_mv=1;
2998 s->decode=1; //FIXME
3001 static int decode_init(AVCodecContext *avctx){
3002 H264Context *h= avctx->priv_data;
3003 MpegEncContext * const s = &h->s;
3005 MPV_decode_defaults(s);
3007 s->avctx = avctx;
3008 common_init(h);
3010 s->out_format = FMT_H264;
3011 s->workaround_bugs= avctx->workaround_bugs;
3013 // set defaults
3014 // s->decode_mb= ff_h263_decode_mb;
3015 s->low_delay= 1;
3016 avctx->pix_fmt= PIX_FMT_YUV420P;
3018 decode_init_vlc(h);
3020 if(avctx->extradata_size > 0 && avctx->extradata &&
3021 *(char *)avctx->extradata == 1){
3022 h->is_avc = 1;
3023 h->got_avcC = 0;
3024 } else {
3025 h->is_avc = 0;
3028 return 0;
3031 static void frame_start(H264Context *h){
3032 MpegEncContext * const s = &h->s;
3033 int i;
3035 MPV_frame_start(s, s->avctx);
3036 ff_er_frame_start(s);
3038 assert(s->linesize && s->uvlinesize);
3040 for(i=0; i<16; i++){
3041 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3042 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3044 for(i=0; i<4; i++){
3045 h->block_offset[16+i]=
3046 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3047 h->block_offset[24+16+i]=
3048 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3051 /* can't be in alloc_tables because linesize isn't known there.
3052 * FIXME: redo bipred weight to not require extra buffer? */
3053 if(!s->obmc_scratchpad)
3054 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3056 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3059 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3060 MpegEncContext * const s = &h->s;
3061 int i;
3063 src_y -= linesize;
3064 src_cb -= uvlinesize;
3065 src_cr -= uvlinesize;
3067 // There are two lines saved, the line above the the top macroblock of a pair,
3068 // and the line above the bottom macroblock
3069 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3070 for(i=1; i<17; i++){
3071 h->left_border[i]= src_y[15+i* linesize];
3074 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3075 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3077 if(!(s->flags&CODEC_FLAG_GRAY)){
3078 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3079 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3080 for(i=1; i<9; i++){
3081 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3082 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3084 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3085 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3089 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3090 MpegEncContext * const s = &h->s;
3091 int temp8, i;
3092 uint64_t temp64;
3093 int deblock_left = (s->mb_x > 0);
3094 int deblock_top = (s->mb_y > 0);
3096 src_y -= linesize + 1;
3097 src_cb -= uvlinesize + 1;
3098 src_cr -= uvlinesize + 1;
3100 #define XCHG(a,b,t,xchg)\
3101 t= a;\
3102 if(xchg)\
3103 a= b;\
3104 b= t;
3106 if(deblock_left){
3107 for(i = !deblock_top; i<17; i++){
3108 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3112 if(deblock_top){
3113 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3114 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3115 if(s->mb_x+1 < s->mb_width){
3116 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3120 if(!(s->flags&CODEC_FLAG_GRAY)){
3121 if(deblock_left){
3122 for(i = !deblock_top; i<9; i++){
3123 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3124 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3127 if(deblock_top){
3128 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3129 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3134 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3135 MpegEncContext * const s = &h->s;
3136 int i;
3138 src_y -= 2 * linesize;
3139 src_cb -= 2 * uvlinesize;
3140 src_cr -= 2 * uvlinesize;
3142 // There are two lines saved, the line above the the top macroblock of a pair,
3143 // and the line above the bottom macroblock
3144 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3145 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3146 for(i=2; i<34; i++){
3147 h->left_border[i]= src_y[15+i* linesize];
3150 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3151 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3152 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3153 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3155 if(!(s->flags&CODEC_FLAG_GRAY)){
3156 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3157 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3158 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3159 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3160 for(i=2; i<18; i++){
3161 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3162 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3164 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3165 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3166 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3167 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3171 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3172 MpegEncContext * const s = &h->s;
3173 int temp8, i;
3174 uint64_t temp64;
3175 int deblock_left = (s->mb_x > 0);
3176 int deblock_top = (s->mb_y > 0);
3178 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3180 src_y -= 2 * linesize + 1;
3181 src_cb -= 2 * uvlinesize + 1;
3182 src_cr -= 2 * uvlinesize + 1;
3184 #define XCHG(a,b,t,xchg)\
3185 t= a;\
3186 if(xchg)\
3187 a= b;\
3188 b= t;
3190 if(deblock_left){
3191 for(i = (!deblock_top)<<1; i<34; i++){
3192 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3196 if(deblock_top){
3197 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3198 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3199 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3200 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3203 if(!(s->flags&CODEC_FLAG_GRAY)){
3204 if(deblock_left){
3205 for(i = (!deblock_top) << 1; i<18; i++){
3206 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3207 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3210 if(deblock_top){
3211 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3212 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3213 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3214 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3219 static void hl_decode_mb(H264Context *h){
3220 MpegEncContext * const s = &h->s;
3221 const int mb_x= s->mb_x;
3222 const int mb_y= s->mb_y;
3223 const int mb_xy= mb_x + mb_y*s->mb_stride;
3224 const int mb_type= s->current_picture.mb_type[mb_xy];
3225 uint8_t *dest_y, *dest_cb, *dest_cr;
3226 int linesize, uvlinesize /*dct_offset*/;
3227 int i;
3228 int *block_offset = &h->block_offset[0];
3229 const unsigned int bottom = mb_y & 1;
3230 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3231 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3233 if(!s->decode)
3234 return;
3236 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3237 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3238 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3240 if (h->mb_field_decoding_flag) {
3241 linesize = s->linesize * 2;
3242 uvlinesize = s->uvlinesize * 2;
3243 block_offset = &h->block_offset[24];
3244 if(mb_y&1){ //FIXME move out of this func?
3245 dest_y -= s->linesize*15;
3246 dest_cb-= s->uvlinesize*7;
3247 dest_cr-= s->uvlinesize*7;
3249 } else {
3250 linesize = s->linesize;
3251 uvlinesize = s->uvlinesize;
3252 // dct_offset = s->linesize * 16;
3255 idct_add = transform_bypass
3256 ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
3257 : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3259 if (IS_INTRA_PCM(mb_type)) {
3260 unsigned int x, y;
3262 // The pixels are stored in h->mb array in the same order as levels,
3263 // copy them in output in the correct order.
3264 for(i=0; i<16; i++) {
3265 for (y=0; y<4; y++) {
3266 for (x=0; x<4; x++) {
3267 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3271 for(i=16; i<16+4; i++) {
3272 for (y=0; y<4; y++) {
3273 for (x=0; x<4; x++) {
3274 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3278 for(i=20; i<20+4; i++) {
3279 for (y=0; y<4; y++) {
3280 for (x=0; x<4; x++) {
3281 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3285 } else {
3286 if(IS_INTRA(mb_type)){
3287 if(h->deblocking_filter) {
3288 if (h->mb_aff_frame) {
3289 if (!bottom)
3290 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3291 } else {
3292 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3296 if(!(s->flags&CODEC_FLAG_GRAY)){
3297 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3298 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3301 if(IS_INTRA4x4(mb_type)){
3302 if(!s->encoding){
3303 if(IS_8x8DCT(mb_type)){
3304 for(i=0; i<16; i+=4){
3305 uint8_t * const ptr= dest_y + block_offset[i];
3306 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3307 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3308 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3309 if(h->non_zero_count_cache[ scan8[i] ])
3310 idct_add(ptr, h->mb + i*16, linesize);
3312 }else
3313 for(i=0; i<16; i++){
3314 uint8_t * const ptr= dest_y + block_offset[i];
3315 uint8_t *topright;
3316 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3317 int tr;
3319 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3320 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3321 assert(mb_y || linesize <= block_offset[i]);
3322 if(!topright_avail){
3323 tr= ptr[3 - linesize]*0x01010101;
3324 topright= (uint8_t*) &tr;
3325 }else
3326 topright= ptr + 4 - linesize;
3327 }else
3328 topright= NULL;
3330 h->pred4x4[ dir ](ptr, topright, linesize);
3331 if(h->non_zero_count_cache[ scan8[i] ]){
3332 if(s->codec_id == CODEC_ID_H264)
3333 idct_add(ptr, h->mb + i*16, linesize);
3334 else
3335 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3339 }else{
3340 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3341 if(s->codec_id == CODEC_ID_H264){
3342 if(!transform_bypass)
3343 h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
3344 }else
3345 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3347 if(h->deblocking_filter) {
3348 if (h->mb_aff_frame) {
3349 if (bottom) {
3350 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3351 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3352 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3353 s->mb_y--;
3354 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3355 s->mb_y++;
3357 } else {
3358 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3361 }else if(s->codec_id == CODEC_ID_H264){
3362 hl_motion(h, dest_y, dest_cb, dest_cr,
3363 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3364 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3365 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3369 if(!IS_INTRA4x4(mb_type)){
3370 if(s->codec_id == CODEC_ID_H264){
3371 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3372 for(i=0; i<16; i+=di){
3373 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3374 uint8_t * const ptr= dest_y + block_offset[i];
3375 idct_add(ptr, h->mb + i*16, linesize);
3378 }else{
3379 for(i=0; i<16; i++){
3380 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3381 uint8_t * const ptr= dest_y + block_offset[i];
3382 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3388 if(!(s->flags&CODEC_FLAG_GRAY)){
3389 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3390 if(!transform_bypass){
3391 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
3392 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
3394 if(s->codec_id == CODEC_ID_H264){
3395 for(i=16; i<16+4; i++){
3396 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3397 uint8_t * const ptr= dest_cb + block_offset[i];
3398 idct_add(ptr, h->mb + i*16, uvlinesize);
3401 for(i=20; i<20+4; i++){
3402 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3403 uint8_t * const ptr= dest_cr + block_offset[i];
3404 idct_add(ptr, h->mb + i*16, uvlinesize);
3407 }else{
3408 for(i=16; i<16+4; i++){
3409 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3410 uint8_t * const ptr= dest_cb + block_offset[i];
3411 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3414 for(i=20; i<20+4; i++){
3415 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3416 uint8_t * const ptr= dest_cr + block_offset[i];
3417 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3423 if(h->deblocking_filter) {
3424 if (h->mb_aff_frame) {
3425 const int mb_y = s->mb_y - 1;
3426 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3427 const int mb_xy= mb_x + mb_y*s->mb_stride;
3428 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3429 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3430 uint8_t tmp = s->current_picture.data[1][384];
3431 if (!bottom) return;
3432 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3433 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3434 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3436 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3437 // TODO deblock a pair
3438 // top
3439 s->mb_y--;
3440 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3441 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3442 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3443 if (tmp != s->current_picture.data[1][384]) {
3444 tprintf("modified pixel 8,1 (1)\n");
3446 // bottom
3447 s->mb_y++;
3448 tprintf("call mbaff filter_mb\n");
3449 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3450 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3451 if (tmp != s->current_picture.data[1][384]) {
3452 tprintf("modified pixel 8,1 (2)\n");
3454 } else {
3455 tprintf("call filter_mb\n");
3456 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3457 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3458 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3464 * fills the default_ref_list.
3466 static int fill_default_ref_list(H264Context *h){
3467 MpegEncContext * const s = &h->s;
3468 int i;
3469 int smallest_poc_greater_than_current = -1;
3470 Picture sorted_short_ref[32];
3472 if(h->slice_type==B_TYPE){
3473 int out_i;
3474 int limit= INT_MIN;
3476 /* sort frame according to poc in B slice */
3477 for(out_i=0; out_i<h->short_ref_count; out_i++){
3478 int best_i=INT_MIN;
3479 int best_poc=INT_MAX;
3481 for(i=0; i<h->short_ref_count; i++){
3482 const int poc= h->short_ref[i]->poc;
3483 if(poc > limit && poc < best_poc){
3484 best_poc= poc;
3485 best_i= i;
3489 assert(best_i != INT_MIN);
3491 limit= best_poc;
3492 sorted_short_ref[out_i]= *h->short_ref[best_i];
3493 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3494 if (-1 == smallest_poc_greater_than_current) {
3495 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3496 smallest_poc_greater_than_current = out_i;
3502 if(s->picture_structure == PICT_FRAME){
3503 if(h->slice_type==B_TYPE){
3504 int list;
3505 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3507 // find the largest poc
3508 for(list=0; list<2; list++){
3509 int index = 0;
3510 int j= -99;
3511 int step= list ? -1 : 1;
3513 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3514 while(j<0 || j>= h->short_ref_count){
3515 if(j != -99 && step == (list ? -1 : 1))
3516 return -1;
3517 step = -step;
3518 j= smallest_poc_greater_than_current + (step>>1);
3520 if(sorted_short_ref[j].reference != 3) continue;
3521 h->default_ref_list[list][index ]= sorted_short_ref[j];
3522 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3525 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3526 if(h->long_ref[i] == NULL) continue;
3527 if(h->long_ref[i]->reference != 3) continue;
3529 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3530 h->default_ref_list[ list ][index++].pic_id= i;;
3533 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3534 // swap the two first elements of L1 when
3535 // L0 and L1 are identical
3536 Picture temp= h->default_ref_list[1][0];
3537 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3538 h->default_ref_list[1][1] = temp;
3541 if(index < h->ref_count[ list ])
3542 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3544 }else{
3545 int index=0;
3546 for(i=0; i<h->short_ref_count; i++){
3547 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3548 h->default_ref_list[0][index ]= *h->short_ref[i];
3549 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3551 for(i = 0; i < 16; i++){
3552 if(h->long_ref[i] == NULL) continue;
3553 if(h->long_ref[i]->reference != 3) continue;
3554 h->default_ref_list[0][index ]= *h->long_ref[i];
3555 h->default_ref_list[0][index++].pic_id= i;;
3557 if(index < h->ref_count[0])
3558 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3560 }else{ //FIELD
3561 if(h->slice_type==B_TYPE){
3562 }else{
3563 //FIXME second field balh
3566 #ifdef TRACE
3567 for (i=0; i<h->ref_count[0]; i++) {
3568 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3570 if(h->slice_type==B_TYPE){
3571 for (i=0; i<h->ref_count[1]; i++) {
3572 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3575 #endif
3576 return 0;
3579 static void print_short_term(H264Context *h);
3580 static void print_long_term(H264Context *h);
3582 static int decode_ref_pic_list_reordering(H264Context *h){
3583 MpegEncContext * const s = &h->s;
3584 int list, index;
3586 print_short_term(h);
3587 print_long_term(h);
3588 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3590 for(list=0; list<2; list++){
3591 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3593 if(get_bits1(&s->gb)){
3594 int pred= h->curr_pic_num;
3596 for(index=0; ; index++){
3597 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3598 int pic_id;
3599 int i;
3600 Picture *ref = NULL;
3602 if(reordering_of_pic_nums_idc==3)
3603 break;
3605 if(index >= h->ref_count[list]){
3606 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3607 return -1;
3610 if(reordering_of_pic_nums_idc<3){
3611 if(reordering_of_pic_nums_idc<2){
3612 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3614 if(abs_diff_pic_num >= h->max_pic_num){
3615 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3616 return -1;
3619 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3620 else pred+= abs_diff_pic_num;
3621 pred &= h->max_pic_num - 1;
3623 for(i= h->short_ref_count-1; i>=0; i--){
3624 ref = h->short_ref[i];
3625 assert(ref->reference == 3);
3626 assert(!ref->long_ref);
3627 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3628 break;
3630 if(i>=0)
3631 ref->pic_id= ref->frame_num;
3632 }else{
3633 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3634 ref = h->long_ref[pic_id];
3635 ref->pic_id= pic_id;
3636 assert(ref->reference == 3);
3637 assert(ref->long_ref);
3638 i=0;
3641 if (i < 0) {
3642 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3643 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3644 } else {
3645 for(i=index; i+1<h->ref_count[list]; i++){
3646 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3647 break;
3649 for(; i > index; i--){
3650 h->ref_list[list][i]= h->ref_list[list][i-1];
3652 h->ref_list[list][index]= *ref;
3654 }else{
3655 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3656 return -1;
3661 if(h->slice_type!=B_TYPE) break;
3663 for(list=0; list<2; list++){
3664 for(index= 0; index < h->ref_count[list]; index++){
3665 if(!h->ref_list[list][index].data[0])
3666 h->ref_list[list][index]= s->current_picture;
3668 if(h->slice_type!=B_TYPE) break;
3671 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3672 direct_dist_scale_factor(h);
3673 direct_ref_list_init(h);
3674 return 0;
3677 static int pred_weight_table(H264Context *h){
3678 MpegEncContext * const s = &h->s;
3679 int list, i;
3680 int luma_def, chroma_def;
3682 h->use_weight= 0;
3683 h->use_weight_chroma= 0;
3684 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3685 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3686 luma_def = 1<<h->luma_log2_weight_denom;
3687 chroma_def = 1<<h->chroma_log2_weight_denom;
3689 for(list=0; list<2; list++){
3690 for(i=0; i<h->ref_count[list]; i++){
3691 int luma_weight_flag, chroma_weight_flag;
3693 luma_weight_flag= get_bits1(&s->gb);
3694 if(luma_weight_flag){
3695 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3696 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3697 if( h->luma_weight[list][i] != luma_def
3698 || h->luma_offset[list][i] != 0)
3699 h->use_weight= 1;
3700 }else{
3701 h->luma_weight[list][i]= luma_def;
3702 h->luma_offset[list][i]= 0;
3705 chroma_weight_flag= get_bits1(&s->gb);
3706 if(chroma_weight_flag){
3707 int j;
3708 for(j=0; j<2; j++){
3709 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3710 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3711 if( h->chroma_weight[list][i][j] != chroma_def
3712 || h->chroma_offset[list][i][j] != 0)
3713 h->use_weight_chroma= 1;
3715 }else{
3716 int j;
3717 for(j=0; j<2; j++){
3718 h->chroma_weight[list][i][j]= chroma_def;
3719 h->chroma_offset[list][i][j]= 0;
3723 if(h->slice_type != B_TYPE) break;
3725 h->use_weight= h->use_weight || h->use_weight_chroma;
3726 return 0;
3729 static void implicit_weight_table(H264Context *h){
3730 MpegEncContext * const s = &h->s;
3731 int ref0, ref1;
3732 int cur_poc = s->current_picture_ptr->poc;
3734 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3735 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3736 h->use_weight= 0;
3737 h->use_weight_chroma= 0;
3738 return;
3741 h->use_weight= 2;
3742 h->use_weight_chroma= 2;
3743 h->luma_log2_weight_denom= 5;
3744 h->chroma_log2_weight_denom= 5;
3746 /* FIXME: MBAFF */
3747 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3748 int poc0 = h->ref_list[0][ref0].poc;
3749 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3750 int poc1 = h->ref_list[1][ref1].poc;
3751 int td = clip(poc1 - poc0, -128, 127);
3752 if(td){
3753 int tb = clip(cur_poc - poc0, -128, 127);
3754 int tx = (16384 + (ABS(td) >> 1)) / td;
3755 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3756 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3757 h->implicit_weight[ref0][ref1] = 32;
3758 else
3759 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3760 }else
3761 h->implicit_weight[ref0][ref1] = 32;
3766 static inline void unreference_pic(H264Context *h, Picture *pic){
3767 int i;
3768 pic->reference=0;
3769 if(pic == h->delayed_output_pic)
3770 pic->reference=1;
3771 else{
3772 for(i = 0; h->delayed_pic[i]; i++)
3773 if(pic == h->delayed_pic[i]){
3774 pic->reference=1;
3775 break;
3781 * instantaneous decoder refresh.
3783 static void idr(H264Context *h){
3784 int i;
3786 for(i=0; i<16; i++){
3787 if (h->long_ref[i] != NULL) {
3788 unreference_pic(h, h->long_ref[i]);
3789 h->long_ref[i]= NULL;
3792 h->long_ref_count=0;
3794 for(i=0; i<h->short_ref_count; i++){
3795 unreference_pic(h, h->short_ref[i]);
3796 h->short_ref[i]= NULL;
3798 h->short_ref_count=0;
3801 /* forget old pics after a seek */
3802 static void flush_dpb(AVCodecContext *avctx){
3803 H264Context *h= avctx->priv_data;
3804 int i;
3805 for(i=0; i<16; i++)
3806 h->delayed_pic[i]= NULL;
3807 h->delayed_output_pic= NULL;
3808 idr(h);
3809 if(h->s.current_picture_ptr)
3810 h->s.current_picture_ptr->reference= 0;
3815 * @return the removed picture or NULL if an error occurs
3817 static Picture * remove_short(H264Context *h, int frame_num){
3818 MpegEncContext * const s = &h->s;
3819 int i;
3821 if(s->avctx->debug&FF_DEBUG_MMCO)
3822 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3824 for(i=0; i<h->short_ref_count; i++){
3825 Picture *pic= h->short_ref[i];
3826 if(s->avctx->debug&FF_DEBUG_MMCO)
3827 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3828 if(pic->frame_num == frame_num){
3829 h->short_ref[i]= NULL;
3830 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3831 h->short_ref_count--;
3832 return pic;
3835 return NULL;
3840 * @return the removed picture or NULL if an error occurs
3842 static Picture * remove_long(H264Context *h, int i){
3843 Picture *pic;
3845 pic= h->long_ref[i];
3846 h->long_ref[i]= NULL;
3847 if(pic) h->long_ref_count--;
3849 return pic;
3853 * print short term list
3855 static void print_short_term(H264Context *h) {
3856 uint32_t i;
3857 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3858 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3859 for(i=0; i<h->short_ref_count; i++){
3860 Picture *pic= h->short_ref[i];
3861 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3867 * print long term list
3869 static void print_long_term(H264Context *h) {
3870 uint32_t i;
3871 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3872 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3873 for(i = 0; i < 16; i++){
3874 Picture *pic= h->long_ref[i];
3875 if (pic) {
3876 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3883 * Executes the reference picture marking (memory management control operations).
3885 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3886 MpegEncContext * const s = &h->s;
3887 int i, j;
3888 int current_is_long=0;
3889 Picture *pic;
3891 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3892 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3894 for(i=0; i<mmco_count; i++){
3895 if(s->avctx->debug&FF_DEBUG_MMCO)
3896 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3898 switch(mmco[i].opcode){
3899 case MMCO_SHORT2UNUSED:
3900 pic= remove_short(h, mmco[i].short_frame_num);
3901 if(pic)
3902 unreference_pic(h, pic);
3903 else if(s->avctx->debug&FF_DEBUG_MMCO)
3904 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3905 break;
3906 case MMCO_SHORT2LONG:
3907 pic= remove_long(h, mmco[i].long_index);
3908 if(pic) unreference_pic(h, pic);
3910 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3911 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3912 h->long_ref_count++;
3913 break;
3914 case MMCO_LONG2UNUSED:
3915 pic= remove_long(h, mmco[i].long_index);
3916 if(pic)
3917 unreference_pic(h, pic);
3918 else if(s->avctx->debug&FF_DEBUG_MMCO)
3919 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3920 break;
3921 case MMCO_LONG:
3922 pic= remove_long(h, mmco[i].long_index);
3923 if(pic) unreference_pic(h, pic);
3925 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3926 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3927 h->long_ref_count++;
3929 current_is_long=1;
3930 break;
3931 case MMCO_SET_MAX_LONG:
3932 assert(mmco[i].long_index <= 16);
3933 // just remove the long term which index is greater than new max
3934 for(j = mmco[i].long_index; j<16; j++){
3935 pic = remove_long(h, j);
3936 if (pic) unreference_pic(h, pic);
3938 break;
3939 case MMCO_RESET:
3940 while(h->short_ref_count){
3941 pic= remove_short(h, h->short_ref[0]->frame_num);
3942 unreference_pic(h, pic);
3944 for(j = 0; j < 16; j++) {
3945 pic= remove_long(h, j);
3946 if(pic) unreference_pic(h, pic);
3948 break;
3949 default: assert(0);
3953 if(!current_is_long){
3954 pic= remove_short(h, s->current_picture_ptr->frame_num);
3955 if(pic){
3956 unreference_pic(h, pic);
3957 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3960 if(h->short_ref_count)
3961 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3963 h->short_ref[0]= s->current_picture_ptr;
3964 h->short_ref[0]->long_ref=0;
3965 h->short_ref_count++;
3968 print_short_term(h);
3969 print_long_term(h);
3970 return 0;
3973 static int decode_ref_pic_marking(H264Context *h){
3974 MpegEncContext * const s = &h->s;
3975 int i;
3977 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3978 s->broken_link= get_bits1(&s->gb) -1;
3979 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
3980 if(h->mmco[0].long_index == -1)
3981 h->mmco_index= 0;
3982 else{
3983 h->mmco[0].opcode= MMCO_LONG;
3984 h->mmco_index= 1;
3986 }else{
3987 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
3988 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3989 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
3991 h->mmco[i].opcode= opcode;
3992 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3993 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
3994 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
3995 fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
3996 return -1;
3999 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4000 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4001 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4002 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4003 return -1;
4007 if(opcode > MMCO_LONG){
4008 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4009 return -1;
4011 if(opcode == MMCO_END)
4012 break;
4014 h->mmco_index= i;
4015 }else{
4016 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4018 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4019 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4020 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4021 h->mmco_index= 1;
4022 }else
4023 h->mmco_index= 0;
4027 return 0;
4030 static int init_poc(H264Context *h){
4031 MpegEncContext * const s = &h->s;
4032 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4033 int field_poc[2];
4035 if(h->nal_unit_type == NAL_IDR_SLICE){
4036 h->frame_num_offset= 0;
4037 }else{
4038 if(h->frame_num < h->prev_frame_num)
4039 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4040 else
4041 h->frame_num_offset= h->prev_frame_num_offset;
4044 if(h->sps.poc_type==0){
4045 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4047 if(h->nal_unit_type == NAL_IDR_SLICE){
4048 h->prev_poc_msb=
4049 h->prev_poc_lsb= 0;
4052 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4053 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4054 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4055 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4056 else
4057 h->poc_msb = h->prev_poc_msb;
4058 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4059 field_poc[0] =
4060 field_poc[1] = h->poc_msb + h->poc_lsb;
4061 if(s->picture_structure == PICT_FRAME)
4062 field_poc[1] += h->delta_poc_bottom;
4063 }else if(h->sps.poc_type==1){
4064 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4065 int i;
4067 if(h->sps.poc_cycle_length != 0)
4068 abs_frame_num = h->frame_num_offset + h->frame_num;
4069 else
4070 abs_frame_num = 0;
4072 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4073 abs_frame_num--;
4075 expected_delta_per_poc_cycle = 0;
4076 for(i=0; i < h->sps.poc_cycle_length; i++)
4077 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4079 if(abs_frame_num > 0){
4080 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4081 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4083 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4084 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4085 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4086 } else
4087 expectedpoc = 0;
4089 if(h->nal_ref_idc == 0)
4090 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4092 field_poc[0] = expectedpoc + h->delta_poc[0];
4093 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4095 if(s->picture_structure == PICT_FRAME)
4096 field_poc[1] += h->delta_poc[1];
4097 }else{
4098 int poc;
4099 if(h->nal_unit_type == NAL_IDR_SLICE){
4100 poc= 0;
4101 }else{
4102 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4103 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4105 field_poc[0]= poc;
4106 field_poc[1]= poc;
4109 if(s->picture_structure != PICT_BOTTOM_FIELD)
4110 s->current_picture_ptr->field_poc[0]= field_poc[0];
4111 if(s->picture_structure != PICT_TOP_FIELD)
4112 s->current_picture_ptr->field_poc[1]= field_poc[1];
4113 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4114 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4116 return 0;
4120 * decodes a slice header.
4121 * this will allso call MPV_common_init() and frame_start() as needed
4123 static int decode_slice_header(H264Context *h){
4124 MpegEncContext * const s = &h->s;
4125 int first_mb_in_slice, pps_id;
4126 int num_ref_idx_active_override_flag;
4127 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4128 int slice_type;
4129 int default_ref_list_done = 0;
4131 s->current_picture.reference= h->nal_ref_idc != 0;
4132 s->dropable= h->nal_ref_idc == 0;
4134 first_mb_in_slice= get_ue_golomb(&s->gb);
4136 slice_type= get_ue_golomb(&s->gb);
4137 if(slice_type > 9){
4138 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4139 return -1;
4141 if(slice_type > 4){
4142 slice_type -= 5;
4143 h->slice_type_fixed=1;
4144 }else
4145 h->slice_type_fixed=0;
4147 slice_type= slice_type_map[ slice_type ];
4148 if (slice_type == I_TYPE
4149 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4150 default_ref_list_done = 1;
4152 h->slice_type= slice_type;
4154 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4156 pps_id= get_ue_golomb(&s->gb);
4157 if(pps_id>255){
4158 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4159 return -1;
4161 h->pps= h->pps_buffer[pps_id];
4162 if(h->pps.slice_group_count == 0){
4163 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4164 return -1;
4167 h->sps= h->sps_buffer[ h->pps.sps_id ];
4168 if(h->sps.log2_max_frame_num == 0){
4169 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4170 return -1;
4173 s->mb_width= h->sps.mb_width;
4174 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4176 h->b_stride= s->mb_width*4 + 1;
4177 h->b8_stride= s->mb_width*2 + 1;
4179 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4180 if(h->sps.frame_mbs_only_flag)
4181 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4182 else
4183 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4185 if (s->context_initialized
4186 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4187 free_tables(h);
4188 MPV_common_end(s);
4190 if (!s->context_initialized) {
4191 if (MPV_common_init(s) < 0)
4192 return -1;
4194 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4195 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4196 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4197 }else{
4198 int i;
4199 for(i=0; i<16; i++){
4200 #define T(x) (x>>2) | ((x<<2) & 0xF)
4201 h->zigzag_scan[i] = T(zigzag_scan[i]);
4202 h-> field_scan[i] = T( field_scan[i]);
4205 if(h->sps.transform_bypass){ //FIXME same ugly
4206 h->zigzag_scan_q0 = zigzag_scan;
4207 h->field_scan_q0 = field_scan;
4208 }else{
4209 h->zigzag_scan_q0 = h->zigzag_scan;
4210 h->field_scan_q0 = h->field_scan;
4213 alloc_tables(h);
4215 s->avctx->width = s->width;
4216 s->avctx->height = s->height;
4217 s->avctx->sample_aspect_ratio= h->sps.sar;
4218 if(!s->avctx->sample_aspect_ratio.den)
4219 s->avctx->sample_aspect_ratio.den = 1;
4221 if(h->sps.timing_info_present_flag){
4222 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
4226 if(h->slice_num == 0){
4227 frame_start(h);
4230 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4231 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4233 h->mb_aff_frame = 0;
4234 if(h->sps.frame_mbs_only_flag){
4235 s->picture_structure= PICT_FRAME;
4236 }else{
4237 if(get_bits1(&s->gb)) { //field_pic_flag
4238 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4239 } else {
4240 s->picture_structure= PICT_FRAME;
4241 first_mb_in_slice <<= h->sps.mb_aff;
4242 h->mb_aff_frame = h->sps.mb_aff;
4246 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4247 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4248 if(s->mb_y >= s->mb_height){
4249 return -1;
4252 if(s->picture_structure==PICT_FRAME){
4253 h->curr_pic_num= h->frame_num;
4254 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4255 }else{
4256 h->curr_pic_num= 2*h->frame_num;
4257 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4260 if(h->nal_unit_type == NAL_IDR_SLICE){
4261 get_ue_golomb(&s->gb); /* idr_pic_id */
4264 if(h->sps.poc_type==0){
4265 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4267 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4268 h->delta_poc_bottom= get_se_golomb(&s->gb);
4272 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4273 h->delta_poc[0]= get_se_golomb(&s->gb);
4275 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4276 h->delta_poc[1]= get_se_golomb(&s->gb);
4279 init_poc(h);
4281 if(h->pps.redundant_pic_cnt_present){
4282 h->redundant_pic_count= get_ue_golomb(&s->gb);
4285 //set defaults, might be overriden a few line later
4286 h->ref_count[0]= h->pps.ref_count[0];
4287 h->ref_count[1]= h->pps.ref_count[1];
4289 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4290 if(h->slice_type == B_TYPE){
4291 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4293 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4295 if(num_ref_idx_active_override_flag){
4296 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4297 if(h->slice_type==B_TYPE)
4298 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4300 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4301 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4302 return -1;
4307 if(!default_ref_list_done){
4308 fill_default_ref_list(h);
4311 if(decode_ref_pic_list_reordering(h) < 0)
4312 return -1;
4314 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4315 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4316 pred_weight_table(h);
4317 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4318 implicit_weight_table(h);
4319 else
4320 h->use_weight = 0;
4322 if(s->current_picture.reference)
4323 decode_ref_pic_marking(h);
4325 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4326 h->cabac_init_idc = get_ue_golomb(&s->gb);
4328 h->last_qscale_diff = 0;
4329 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4330 if(s->qscale<0 || s->qscale>51){
4331 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4332 return -1;
4334 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4335 //FIXME qscale / qp ... stuff
4336 if(h->slice_type == SP_TYPE){
4337 get_bits1(&s->gb); /* sp_for_switch_flag */
4339 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4340 get_se_golomb(&s->gb); /* slice_qs_delta */
4343 h->deblocking_filter = 1;
4344 h->slice_alpha_c0_offset = 0;
4345 h->slice_beta_offset = 0;
4346 if( h->pps.deblocking_filter_parameters_present ) {
4347 h->deblocking_filter= get_ue_golomb(&s->gb);
4348 if(h->deblocking_filter < 2)
4349 h->deblocking_filter^= 1; // 1<->0
4351 if( h->deblocking_filter ) {
4352 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4353 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4356 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4357 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4358 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4359 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4360 h->deblocking_filter= 0;
4362 #if 0 //FMO
4363 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4364 slice_group_change_cycle= get_bits(&s->gb, ?);
4365 #endif
4367 h->slice_num++;
4369 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4370 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4371 h->slice_num,
4372 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4373 first_mb_in_slice,
4374 av_get_pict_type_char(h->slice_type),
4375 pps_id, h->frame_num,
4376 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4377 h->ref_count[0], h->ref_count[1],
4378 s->qscale,
4379 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4380 h->use_weight,
4381 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4385 return 0;
4391 static inline int get_level_prefix(GetBitContext *gb){
4392 unsigned int buf;
4393 int log;
4395 OPEN_READER(re, gb);
4396 UPDATE_CACHE(re, gb);
4397 buf=GET_CACHE(re, gb);
4399 log= 32 - av_log2(buf);
4400 #ifdef TRACE
4401 print_bin(buf>>(32-log), log);
4402 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4403 #endif
4405 LAST_SKIP_BITS(re, gb, log);
4406 CLOSE_READER(re, gb);
4408 return log-1;
4411 static inline int get_dct8x8_allowed(H264Context *h){
4412 int i;
4413 for(i=0; i<4; i++){
4414 if(!IS_SUB_8X8(h->sub_mb_type[i])
4415 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4416 return 0;
4418 return 1;
4422 * decodes a residual block.
4423 * @param n block index
4424 * @param scantable scantable
4425 * @param max_coeff number of coefficients in the block
4426 * @return <0 if an error occured
4428 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
4429 MpegEncContext * const s = &h->s;
4430 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4431 int level[16], run[16];
4432 int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
4434 //FIXME put trailing_onex into the context
4436 if(n == CHROMA_DC_BLOCK_INDEX){
4437 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4438 total_coeff= coeff_token>>2;
4439 }else{
4440 if(n == LUMA_DC_BLOCK_INDEX){
4441 total_coeff= pred_non_zero_count(h, 0);
4442 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4443 total_coeff= coeff_token>>2;
4444 }else{
4445 total_coeff= pred_non_zero_count(h, n);
4446 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4447 total_coeff= coeff_token>>2;
4448 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4452 //FIXME set last_non_zero?
4454 if(total_coeff==0)
4455 return 0;
4457 trailing_ones= coeff_token&3;
4458 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4459 assert(total_coeff<=16);
4461 for(i=0; i<trailing_ones; i++){
4462 level[i]= 1 - 2*get_bits1(gb);
4465 suffix_length= total_coeff > 10 && trailing_ones < 3;
4467 for(; i<total_coeff; i++){
4468 const int prefix= get_level_prefix(gb);
4469 int level_code, mask;
4471 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4472 if(suffix_length)
4473 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4474 else
4475 level_code= (prefix<<suffix_length); //part
4476 }else if(prefix==14){
4477 if(suffix_length)
4478 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4479 else
4480 level_code= prefix + get_bits(gb, 4); //part
4481 }else if(prefix==15){
4482 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4483 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4484 }else{
4485 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4486 return -1;
4489 if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
4491 mask= -(level_code&1);
4492 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4494 if(suffix_length==0) suffix_length=1; //FIXME split first iteration
4496 #if 1
4497 if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
4498 #else
4499 if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
4500 /* ? == prefix > 2 or sth */
4501 #endif
4502 tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
4505 if(total_coeff == max_coeff)
4506 zeros_left=0;
4507 else{
4508 if(n == CHROMA_DC_BLOCK_INDEX)
4509 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4510 else
4511 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4514 for(i=0; i<total_coeff-1; i++){
4515 if(zeros_left <=0)
4516 break;
4517 else if(zeros_left < 7){
4518 run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4519 }else{
4520 run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4522 zeros_left -= run[i];
4525 if(zeros_left<0){
4526 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4527 return -1;
4530 for(; i<total_coeff-1; i++){
4531 run[i]= 0;
4534 run[i]= zeros_left;
4536 coeff_num=-1;
4537 if(n > 24){
4538 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4539 int j;
4541 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4542 j= scantable[ coeff_num ];
4544 block[j]= level[i];
4546 }else{
4547 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4548 int j;
4550 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4551 j= scantable[ coeff_num ];
4553 block[j]= level[i] * qmul[j];
4554 // printf("%d %d ", block[j], qmul[j]);
4557 return 0;
4561 * decodes a P_SKIP or B_SKIP macroblock
4563 static void decode_mb_skip(H264Context *h){
4564 MpegEncContext * const s = &h->s;
4565 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4566 int mb_type=0;
4568 memset(h->non_zero_count[mb_xy], 0, 16);
4569 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4571 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4572 h->mb_field_decoding_flag= get_bits1(&s->gb);
4574 if(h->mb_field_decoding_flag)
4575 mb_type|= MB_TYPE_INTERLACED;
4577 if( h->slice_type == B_TYPE )
4579 // just for fill_caches. pred_direct_motion will set the real mb_type
4580 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4582 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4583 pred_direct_motion(h, &mb_type);
4584 if(h->pps.cabac){
4585 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4586 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4589 else
4591 int mx, my;
4592 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4594 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4595 pred_pskip_motion(h, &mx, &my);
4596 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4597 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4598 if(h->pps.cabac)
4599 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4602 write_back_motion(h, mb_type);
4603 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4604 s->current_picture.qscale_table[mb_xy]= s->qscale;
4605 h->slice_table[ mb_xy ]= h->slice_num;
4606 h->prev_mb_skipped= 1;
4610 * decodes a macroblock
4611 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4613 static int decode_mb_cavlc(H264Context *h){
4614 MpegEncContext * const s = &h->s;
4615 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4616 int mb_type, partition_count, cbp;
4617 int dct8x8_allowed= h->pps.transform_8x8_mode;
4619 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4621 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4622 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4623 down the code */
4624 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4625 if(s->mb_skip_run==-1)
4626 s->mb_skip_run= get_ue_golomb(&s->gb);
4628 if (s->mb_skip_run--) {
4629 decode_mb_skip(h);
4630 return 0;
4633 if(h->mb_aff_frame){
4634 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4635 h->mb_field_decoding_flag = get_bits1(&s->gb);
4636 }else
4637 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4639 h->prev_mb_skipped= 0;
4641 mb_type= get_ue_golomb(&s->gb);
4642 if(h->slice_type == B_TYPE){
4643 if(mb_type < 23){
4644 partition_count= b_mb_type_info[mb_type].partition_count;
4645 mb_type= b_mb_type_info[mb_type].type;
4646 }else{
4647 mb_type -= 23;
4648 goto decode_intra_mb;
4650 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4651 if(mb_type < 5){
4652 partition_count= p_mb_type_info[mb_type].partition_count;
4653 mb_type= p_mb_type_info[mb_type].type;
4654 }else{
4655 mb_type -= 5;
4656 goto decode_intra_mb;
4658 }else{
4659 assert(h->slice_type == I_TYPE);
4660 decode_intra_mb:
4661 if(mb_type > 25){
4662 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4663 return -1;
4665 partition_count=0;
4666 cbp= i_mb_type_info[mb_type].cbp;
4667 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4668 mb_type= i_mb_type_info[mb_type].type;
4671 if(h->mb_field_decoding_flag)
4672 mb_type |= MB_TYPE_INTERLACED;
4674 h->slice_table[ mb_xy ]= h->slice_num;
4676 if(IS_INTRA_PCM(mb_type)){
4677 unsigned int x, y;
4679 // we assume these blocks are very rare so we dont optimize it
4680 align_get_bits(&s->gb);
4682 // The pixels are stored in the same order as levels in h->mb array.
4683 for(y=0; y<16; y++){
4684 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4685 for(x=0; x<16; x++){
4686 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4687 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4690 for(y=0; y<8; y++){
4691 const int index= 256 + 4*(y&3) + 32*(y>>2);
4692 for(x=0; x<8; x++){
4693 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4694 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4697 for(y=0; y<8; y++){
4698 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4699 for(x=0; x<8; x++){
4700 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4701 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4705 // In deblocking, the quantizer is 0
4706 s->current_picture.qscale_table[mb_xy]= 0;
4707 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4708 // All coeffs are present
4709 memset(h->non_zero_count[mb_xy], 16, 16);
4711 s->current_picture.mb_type[mb_xy]= mb_type;
4712 return 0;
4715 fill_caches(h, mb_type, 0);
4717 //mb_pred
4718 if(IS_INTRA(mb_type)){
4719 // init_top_left_availability(h);
4720 if(IS_INTRA4x4(mb_type)){
4721 int i;
4722 int di = 1;
4723 if(dct8x8_allowed && get_bits1(&s->gb)){
4724 mb_type |= MB_TYPE_8x8DCT;
4725 di = 4;
4728 // fill_intra4x4_pred_table(h);
4729 for(i=0; i<16; i+=di){
4730 const int mode_coded= !get_bits1(&s->gb);
4731 const int predicted_mode= pred_intra_mode(h, i);
4732 int mode;
4734 if(mode_coded){
4735 const int rem_mode= get_bits(&s->gb, 3);
4736 if(rem_mode<predicted_mode)
4737 mode= rem_mode;
4738 else
4739 mode= rem_mode + 1;
4740 }else{
4741 mode= predicted_mode;
4744 if(di==4)
4745 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4746 else
4747 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4749 write_back_intra_pred_mode(h);
4750 if( check_intra4x4_pred_mode(h) < 0)
4751 return -1;
4752 }else{
4753 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4754 if(h->intra16x16_pred_mode < 0)
4755 return -1;
4757 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4759 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4760 if(h->chroma_pred_mode < 0)
4761 return -1;
4762 }else if(partition_count==4){
4763 int i, j, sub_partition_count[4], list, ref[2][4];
4765 if(h->slice_type == B_TYPE){
4766 for(i=0; i<4; i++){
4767 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4768 if(h->sub_mb_type[i] >=13){
4769 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4770 return -1;
4772 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4773 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4775 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4776 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3]))
4777 pred_direct_motion(h, &mb_type);
4778 }else{
4779 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4780 for(i=0; i<4; i++){
4781 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4782 if(h->sub_mb_type[i] >=4){
4783 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4784 return -1;
4786 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4787 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4791 for(list=0; list<2; list++){
4792 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4793 if(ref_count == 0) continue;
4794 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4795 ref_count <<= 1;
4797 for(i=0; i<4; i++){
4798 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4799 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4800 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4801 }else{
4802 //FIXME
4803 ref[list][i] = -1;
4808 if(dct8x8_allowed)
4809 dct8x8_allowed = get_dct8x8_allowed(h);
4811 for(list=0; list<2; list++){
4812 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4813 if(ref_count == 0) continue;
4815 for(i=0; i<4; i++){
4816 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4817 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4818 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4820 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4821 const int sub_mb_type= h->sub_mb_type[i];
4822 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4823 for(j=0; j<sub_partition_count[i]; j++){
4824 int mx, my;
4825 const int index= 4*i + block_width*j;
4826 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4827 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4828 mx += get_se_golomb(&s->gb);
4829 my += get_se_golomb(&s->gb);
4830 tprintf("final mv:%d %d\n", mx, my);
4832 if(IS_SUB_8X8(sub_mb_type)){
4833 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4834 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4835 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4836 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4837 }else if(IS_SUB_8X4(sub_mb_type)){
4838 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4839 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4840 }else if(IS_SUB_4X8(sub_mb_type)){
4841 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4842 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4843 }else{
4844 assert(IS_SUB_4X4(sub_mb_type));
4845 mv_cache[ 0 ][0]= mx;
4846 mv_cache[ 0 ][1]= my;
4849 }else{
4850 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4851 p[0] = p[1]=
4852 p[8] = p[9]= 0;
4856 }else if(IS_DIRECT(mb_type)){
4857 pred_direct_motion(h, &mb_type);
4858 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4859 }else{
4860 int list, mx, my, i;
4861 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4862 if(IS_16X16(mb_type)){
4863 for(list=0; list<2; list++){
4864 if(h->ref_count[list]>0){
4865 if(IS_DIR(mb_type, 0, list)){
4866 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4867 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4868 }else
4869 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4872 for(list=0; list<2; list++){
4873 if(IS_DIR(mb_type, 0, list)){
4874 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4875 mx += get_se_golomb(&s->gb);
4876 my += get_se_golomb(&s->gb);
4877 tprintf("final mv:%d %d\n", mx, my);
4879 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4880 }else
4881 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4884 else if(IS_16X8(mb_type)){
4885 for(list=0; list<2; list++){
4886 if(h->ref_count[list]>0){
4887 for(i=0; i<2; i++){
4888 if(IS_DIR(mb_type, i, list)){
4889 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4890 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4891 }else
4892 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
4896 for(list=0; list<2; list++){
4897 for(i=0; i<2; i++){
4898 if(IS_DIR(mb_type, i, list)){
4899 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4900 mx += get_se_golomb(&s->gb);
4901 my += get_se_golomb(&s->gb);
4902 tprintf("final mv:%d %d\n", mx, my);
4904 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
4905 }else
4906 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
4909 }else{
4910 assert(IS_8X16(mb_type));
4911 for(list=0; list<2; list++){
4912 if(h->ref_count[list]>0){
4913 for(i=0; i<2; i++){
4914 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4915 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4916 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4917 }else
4918 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
4922 for(list=0; list<2; list++){
4923 for(i=0; i<2; i++){
4924 if(IS_DIR(mb_type, i, list)){
4925 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4926 mx += get_se_golomb(&s->gb);
4927 my += get_se_golomb(&s->gb);
4928 tprintf("final mv:%d %d\n", mx, my);
4930 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
4931 }else
4932 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
4938 if(IS_INTER(mb_type))
4939 write_back_motion(h, mb_type);
4941 if(!IS_INTRA16x16(mb_type)){
4942 cbp= get_ue_golomb(&s->gb);
4943 if(cbp > 47){
4944 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
4945 return -1;
4948 if(IS_INTRA4x4(mb_type))
4949 cbp= golomb_to_intra4x4_cbp[cbp];
4950 else
4951 cbp= golomb_to_inter_cbp[cbp];
4954 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4955 if(get_bits1(&s->gb))
4956 mb_type |= MB_TYPE_8x8DCT;
4958 s->current_picture.mb_type[mb_xy]= mb_type;
4960 if(cbp || IS_INTRA16x16(mb_type)){
4961 int i8x8, i4x4, chroma_idx;
4962 int chroma_qp, dquant;
4963 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4964 const uint8_t *scan, *dc_scan;
4966 // fill_non_zero_count_cache(h);
4968 if(IS_INTERLACED(mb_type)){
4969 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4970 dc_scan= luma_dc_field_scan;
4971 }else{
4972 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4973 dc_scan= luma_dc_zigzag_scan;
4976 dquant= get_se_golomb(&s->gb);
4978 if( dquant > 25 || dquant < -26 ){
4979 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4980 return -1;
4983 s->qscale += dquant;
4984 if(((unsigned)s->qscale) > 51){
4985 if(s->qscale<0) s->qscale+= 52;
4986 else s->qscale-= 52;
4989 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4990 if(IS_INTRA16x16(mb_type)){
4991 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0){
4992 return -1; //FIXME continue if partitioned and other return -1 too
4995 assert((cbp&15) == 0 || (cbp&15) == 15);
4997 if(cbp&15){
4998 for(i8x8=0; i8x8<4; i8x8++){
4999 for(i4x4=0; i4x4<4; i4x4++){
5000 const int index= i4x4 + 4*i8x8;
5001 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ){
5002 return -1;
5006 }else{
5007 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5009 }else{
5010 for(i8x8=0; i8x8<4; i8x8++){
5011 if(cbp & (1<<i8x8)){
5012 if(IS_8x8DCT(mb_type)){
5013 DCTELEM *buf = &h->mb[64*i8x8];
5014 uint8_t *nnz;
5015 for(i4x4=0; i4x4<4; i4x4++){
5016 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5017 h->dequant8_coeff[s->qscale], 16) <0 )
5018 return -1;
5020 if(s->qscale < 12){
5021 int i;
5022 for(i=0; i<64; i++)
5023 buf[i] = (buf[i] + 2) >> 2;
5025 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5026 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5027 }else{
5028 for(i4x4=0; i4x4<4; i4x4++){
5029 const int index= i4x4 + 4*i8x8;
5031 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[s->qscale], 16) <0 ){
5032 return -1;
5036 }else{
5037 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5038 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5043 if(cbp&0x30){
5044 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5045 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, h->dequant4_coeff[chroma_qp], 4) < 0){
5046 return -1;
5050 if(cbp&0x20){
5051 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5052 for(i4x4=0; i4x4<4; i4x4++){
5053 const int index= 16 + 4*chroma_idx + i4x4;
5054 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_qp], 15) < 0){
5055 return -1;
5059 }else{
5060 uint8_t * const nnz= &h->non_zero_count_cache[0];
5061 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5062 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5064 }else{
5065 uint8_t * const nnz= &h->non_zero_count_cache[0];
5066 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5067 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5068 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5070 s->current_picture.qscale_table[mb_xy]= s->qscale;
5071 write_back_non_zero_count(h);
5073 return 0;
5076 static int decode_cabac_field_decoding_flag(H264Context *h) {
5077 MpegEncContext * const s = &h->s;
5078 const int mb_x = s->mb_x;
5079 const int mb_y = s->mb_y & ~1;
5080 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5081 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5083 unsigned int ctx = 0;
5085 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5086 ctx += 1;
5088 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5089 ctx += 1;
5092 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5095 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5096 uint8_t *state= &h->cabac_state[ctx_base];
5097 int mb_type;
5099 if(intra_slice){
5100 MpegEncContext * const s = &h->s;
5101 const int mba_xy = h->left_mb_xy[0];
5102 const int mbb_xy = h->top_mb_xy;
5103 int ctx=0;
5104 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5105 ctx++;
5106 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5107 ctx++;
5108 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5109 return 0; /* I4x4 */
5110 state += 2;
5111 }else{
5112 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5113 return 0; /* I4x4 */
5116 if( get_cabac_terminate( &h->cabac ) )
5117 return 25; /* PCM */
5119 mb_type = 1; /* I16x16 */
5120 if( get_cabac( &h->cabac, &state[1] ) )
5121 mb_type += 12; /* cbp_luma != 0 */
5123 if( get_cabac( &h->cabac, &state[2] ) ) {
5124 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5125 mb_type += 4 * 2; /* cbp_chroma == 2 */
5126 else
5127 mb_type += 4 * 1; /* cbp_chroma == 1 */
5129 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5130 mb_type += 2;
5131 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5132 mb_type += 1;
5133 return mb_type;
5136 static int decode_cabac_mb_type( H264Context *h ) {
5137 MpegEncContext * const s = &h->s;
5139 if( h->slice_type == I_TYPE ) {
5140 return decode_cabac_intra_mb_type(h, 3, 1);
5141 } else if( h->slice_type == P_TYPE ) {
5142 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5143 /* P-type */
5144 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5145 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5146 return 0; /* P_L0_D16x16; */
5147 else
5148 return 3; /* P_8x8; */
5149 } else {
5150 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5151 return 2; /* P_L0_D8x16; */
5152 else
5153 return 1; /* P_L0_D16x8; */
5155 } else {
5156 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5158 } else if( h->slice_type == B_TYPE ) {
5159 const int mba_xy = h->left_mb_xy[0];
5160 const int mbb_xy = h->top_mb_xy;
5161 int ctx = 0;
5162 int bits;
5164 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5165 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5166 ctx++;
5167 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5168 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5169 ctx++;
5171 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5172 return 0; /* B_Direct_16x16 */
5174 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5175 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5178 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5179 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5180 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5181 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5182 if( bits < 8 )
5183 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5184 else if( bits == 13 ) {
5185 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5186 } else if( bits == 14 )
5187 return 11; /* B_L1_L0_8x16 */
5188 else if( bits == 15 )
5189 return 22; /* B_8x8 */
5191 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5192 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5193 } else {
5194 /* TODO SI/SP frames? */
5195 return -1;
5199 static int decode_cabac_mb_skip( H264Context *h) {
5200 MpegEncContext * const s = &h->s;
5201 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5202 const int mba_xy = mb_xy - 1;
5203 const int mbb_xy = mb_xy - s->mb_stride;
5204 int ctx = 0;
5206 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5207 ctx++;
5208 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5209 ctx++;
5211 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5212 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5213 else /* B-frame */
5214 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5217 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5218 int mode = 0;
5220 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5221 return pred_mode;
5223 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5224 mode += 1;
5225 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5226 mode += 2;
5227 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5228 mode += 4;
5229 if( mode >= pred_mode )
5230 return mode + 1;
5231 else
5232 return mode;
5235 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5236 const int mba_xy = h->left_mb_xy[0];
5237 const int mbb_xy = h->top_mb_xy;
5239 int ctx = 0;
5241 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5242 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5243 ctx++;
5245 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5246 ctx++;
5248 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5249 return 0;
5251 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5252 return 1;
5253 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5254 return 2;
5255 else
5256 return 3;
5259 static const uint8_t block_idx_x[16] = {
5260 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5262 static const uint8_t block_idx_y[16] = {
5263 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5265 static const uint8_t block_idx_xy[4][4] = {
5266 { 0, 2, 8, 10},
5267 { 1, 3, 9, 11},
5268 { 4, 6, 12, 14},
5269 { 5, 7, 13, 15}
5272 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5273 MpegEncContext * const s = &h->s;
5275 int cbp = 0;
5276 int i8x8;
5278 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5279 int cbp_a = -1;
5280 int cbp_b = -1;
5281 int x, y;
5282 int ctx = 0;
5284 x = block_idx_x[4*i8x8];
5285 y = block_idx_y[4*i8x8];
5287 if( x > 0 )
5288 cbp_a = cbp;
5289 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5290 cbp_a = h->left_cbp;
5291 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5294 if( y > 0 )
5295 cbp_b = cbp;
5296 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5297 cbp_b = h->top_cbp;
5298 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5301 /* No need to test for skip as we put 0 for skip block */
5302 /* No need to test for IPCM as we put 1 for IPCM block */
5303 if( cbp_a >= 0 ) {
5304 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5305 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5306 ctx++;
5309 if( cbp_b >= 0 ) {
5310 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5311 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5312 ctx += 2;
5315 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5316 cbp |= 1 << i8x8;
5319 return cbp;
5321 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5322 int ctx;
5323 int cbp_a, cbp_b;
5325 cbp_a = (h->left_cbp>>4)&0x03;
5326 cbp_b = (h-> top_cbp>>4)&0x03;
5328 ctx = 0;
5329 if( cbp_a > 0 ) ctx++;
5330 if( cbp_b > 0 ) ctx += 2;
5331 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5332 return 0;
5334 ctx = 4;
5335 if( cbp_a == 2 ) ctx++;
5336 if( cbp_b == 2 ) ctx += 2;
5337 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5339 static int decode_cabac_mb_dqp( H264Context *h) {
5340 MpegEncContext * const s = &h->s;
5341 int mbn_xy;
5342 int ctx = 0;
5343 int val = 0;
5345 if( s->mb_x > 0 )
5346 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5347 else
5348 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5350 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5351 ctx++;
5353 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5354 if( ctx < 2 )
5355 ctx = 2;
5356 else
5357 ctx = 3;
5358 val++;
5361 if( val&0x01 )
5362 return (val + 1)/2;
5363 else
5364 return -(val + 1)/2;
5366 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5367 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5368 return 0; /* 8x8 */
5369 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5370 return 1; /* 8x4 */
5371 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5372 return 2; /* 4x8 */
5373 return 3; /* 4x4 */
5375 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5376 int type;
5377 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5378 return 0; /* B_Direct_8x8 */
5379 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5380 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5381 type = 3;
5382 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5383 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5384 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5385 type += 4;
5387 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5388 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5389 return type;
5392 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5393 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5396 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5397 int refa = h->ref_cache[list][scan8[n] - 1];
5398 int refb = h->ref_cache[list][scan8[n] - 8];
5399 int ref = 0;
5400 int ctx = 0;
5402 if( h->slice_type == B_TYPE) {
5403 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5404 ctx++;
5405 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5406 ctx += 2;
5407 } else {
5408 if( refa > 0 )
5409 ctx++;
5410 if( refb > 0 )
5411 ctx += 2;
5414 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5415 ref++;
5416 if( ctx < 4 )
5417 ctx = 4;
5418 else
5419 ctx = 5;
5421 return ref;
5424 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5425 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5426 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5427 int ctxbase = (l == 0) ? 40 : 47;
5428 int ctx, mvd;
5430 if( amvd < 3 )
5431 ctx = 0;
5432 else if( amvd > 32 )
5433 ctx = 2;
5434 else
5435 ctx = 1;
5437 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5438 return 0;
5440 mvd= 1;
5441 ctx= 3;
5442 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5443 mvd++;
5444 if( ctx < 6 )
5445 ctx++;
5448 if( mvd >= 9 ) {
5449 int k = 3;
5450 while( get_cabac_bypass( &h->cabac ) ) {
5451 mvd += 1 << k;
5452 k++;
5454 while( k-- ) {
5455 if( get_cabac_bypass( &h->cabac ) )
5456 mvd += 1 << k;
5459 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5460 else return mvd;
5463 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5464 int nza, nzb;
5465 int ctx = 0;
5467 if( cat == 0 ) {
5468 nza = h->left_cbp&0x100;
5469 nzb = h-> top_cbp&0x100;
5470 } else if( cat == 1 || cat == 2 ) {
5471 nza = h->non_zero_count_cache[scan8[idx] - 1];
5472 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5473 } else if( cat == 3 ) {
5474 nza = (h->left_cbp>>(6+idx))&0x01;
5475 nzb = (h-> top_cbp>>(6+idx))&0x01;
5476 } else {
5477 assert(cat == 4);
5478 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5479 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5482 if( nza > 0 )
5483 ctx++;
5485 if( nzb > 0 )
5486 ctx += 2;
5488 return ctx + 4 * cat;
5491 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) {
5492 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5493 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5494 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5495 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5496 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5497 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5498 static const int identity[15] = {
5499 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
5501 static const int significant_coeff_flag_offset_8x8[63] = {
5502 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5503 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5504 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5505 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5507 static const int last_coeff_flag_offset_8x8[63] = {
5508 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5509 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5510 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5511 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5514 int index[64];
5516 int i, last;
5517 int coeff_count = 0;
5519 int abslevel1 = 1;
5520 int abslevelgt1 = 0;
5522 const int* significant_coeff_ctx_offset;
5523 const int* last_coeff_ctx_offset;
5524 const int significant_coeff_ctx_base = significant_coeff_flag_offset[cat]
5525 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5526 const int last_coeff_ctx_base = last_significant_coeff_flag_offset[cat]
5527 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5529 /* cat: 0-> DC 16x16 n = 0
5530 * 1-> AC 16x16 n = luma4x4idx
5531 * 2-> Luma4x4 n = luma4x4idx
5532 * 3-> DC Chroma n = iCbCr
5533 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5534 * 5-> Luma8x8 n = 4 * luma8x8idx
5537 /* read coded block flag */
5538 if( cat == 5 ) {
5539 significant_coeff_ctx_offset = significant_coeff_flag_offset_8x8;
5540 last_coeff_ctx_offset = last_coeff_flag_offset_8x8;
5541 } else {
5542 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5543 if( cat == 1 || cat == 2 )
5544 h->non_zero_count_cache[scan8[n]] = 0;
5545 else if( cat == 4 )
5546 h->non_zero_count_cache[scan8[16+n]] = 0;
5548 return 0;
5551 significant_coeff_ctx_offset =
5552 last_coeff_ctx_offset = identity;
5555 for(last= 0; last < max_coeff - 1; last++) {
5556 int sig_ctx = significant_coeff_ctx_base + significant_coeff_ctx_offset[last];
5557 if( get_cabac( &h->cabac, &h->cabac_state[sig_ctx] )) {
5558 int last_ctx = last_coeff_ctx_base + last_coeff_ctx_offset[last];
5559 index[coeff_count++] = last;
5560 if( get_cabac( &h->cabac, &h->cabac_state[last_ctx] ) ) {
5561 last= max_coeff;
5562 break;
5566 if( last == max_coeff -1 ) {
5567 index[coeff_count++] = last;
5569 assert(coeff_count > 0);
5571 if( cat == 0 )
5572 h->cbp_table[mb_xy] |= 0x100;
5573 else if( cat == 1 || cat == 2 )
5574 h->non_zero_count_cache[scan8[n]] = coeff_count;
5575 else if( cat == 3 )
5576 h->cbp_table[mb_xy] |= 0x40 << n;
5577 else if( cat == 4 )
5578 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5579 else {
5580 assert( cat == 5 );
5581 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5584 for( i = coeff_count - 1; i >= 0; i-- ) {
5585 int ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + coeff_abs_level_m1_offset[cat];
5586 int j= scantable[index[i]];
5588 if( get_cabac( &h->cabac, &h->cabac_state[ctx] ) == 0 ) {
5589 if( cat == 0 || cat == 3 ) {
5590 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5591 else block[j] = 1;
5592 }else{
5593 if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j];
5594 else block[j] = qmul[j];
5597 abslevel1++;
5598 } else {
5599 int coeff_abs = 2;
5600 ctx = 5 + FFMIN( 4, abslevelgt1 ) + coeff_abs_level_m1_offset[cat];
5601 while( coeff_abs < 15 && get_cabac( &h->cabac, &h->cabac_state[ctx] ) ) {
5602 coeff_abs++;
5605 if( coeff_abs >= 15 ) {
5606 int j = 0;
5607 while( get_cabac_bypass( &h->cabac ) ) {
5608 coeff_abs += 1 << j;
5609 j++;
5612 while( j-- ) {
5613 if( get_cabac_bypass( &h->cabac ) )
5614 coeff_abs += 1 << j ;
5618 if( cat == 0 || cat == 3 ) {
5619 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5620 else block[j] = coeff_abs;
5621 }else{
5622 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j];
5623 else block[j] = coeff_abs * qmul[j];
5626 abslevelgt1++;
5629 return 0;
5632 void inline compute_mb_neighboors(H264Context *h)
5634 MpegEncContext * const s = &h->s;
5635 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5636 h->top_mb_xy = mb_xy - s->mb_stride;
5637 h->left_mb_xy[0] = mb_xy - 1;
5638 if(h->mb_aff_frame){
5639 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5640 const int top_pair_xy = pair_xy - s->mb_stride;
5641 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5642 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5643 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5644 const int bottom = (s->mb_y & 1);
5645 if (bottom
5646 ? !curr_mb_frame_flag // bottom macroblock
5647 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5649 h->top_mb_xy -= s->mb_stride;
5651 if (left_mb_frame_flag != curr_mb_frame_flag) {
5652 h->left_mb_xy[0] = pair_xy - 1;
5655 return;
5659 * decodes a macroblock
5660 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5662 static int decode_mb_cabac(H264Context *h) {
5663 MpegEncContext * const s = &h->s;
5664 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5665 int mb_type, partition_count, cbp = 0;
5666 int dct8x8_allowed= h->pps.transform_8x8_mode;
5668 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5670 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5671 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5672 /* read skip flags */
5673 if( decode_cabac_mb_skip( h ) ) {
5674 decode_mb_skip(h);
5676 h->cbp_table[mb_xy] = 0;
5677 h->chroma_pred_mode_table[mb_xy] = 0;
5678 h->last_qscale_diff = 0;
5680 return 0;
5684 if(h->mb_aff_frame){
5685 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5686 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5687 }else
5688 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5690 h->prev_mb_skipped = 0;
5692 compute_mb_neighboors(h);
5693 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5694 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5695 return -1;
5698 if( h->slice_type == B_TYPE ) {
5699 if( mb_type < 23 ){
5700 partition_count= b_mb_type_info[mb_type].partition_count;
5701 mb_type= b_mb_type_info[mb_type].type;
5702 }else{
5703 mb_type -= 23;
5704 goto decode_intra_mb;
5706 } else if( h->slice_type == P_TYPE ) {
5707 if( mb_type < 5) {
5708 partition_count= p_mb_type_info[mb_type].partition_count;
5709 mb_type= p_mb_type_info[mb_type].type;
5710 } else {
5711 mb_type -= 5;
5712 goto decode_intra_mb;
5714 } else {
5715 assert(h->slice_type == I_TYPE);
5716 decode_intra_mb:
5717 partition_count = 0;
5718 cbp= i_mb_type_info[mb_type].cbp;
5719 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5720 mb_type= i_mb_type_info[mb_type].type;
5722 if(h->mb_field_decoding_flag)
5723 mb_type |= MB_TYPE_INTERLACED;
5725 h->slice_table[ mb_xy ]= h->slice_num;
5727 if(IS_INTRA_PCM(mb_type)) {
5728 const uint8_t *ptr;
5729 unsigned int x, y;
5731 // We assume these blocks are very rare so we dont optimize it.
5732 // FIXME The two following lines get the bitstream position in the cabac
5733 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5734 ptr= h->cabac.bytestream;
5735 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5737 // The pixels are stored in the same order as levels in h->mb array.
5738 for(y=0; y<16; y++){
5739 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5740 for(x=0; x<16; x++){
5741 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5742 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5745 for(y=0; y<8; y++){
5746 const int index= 256 + 4*(y&3) + 32*(y>>2);
5747 for(x=0; x<8; x++){
5748 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5749 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5752 for(y=0; y<8; y++){
5753 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5754 for(x=0; x<8; x++){
5755 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5756 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5760 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5762 // All blocks are present
5763 h->cbp_table[mb_xy] = 0x1ef;
5764 h->chroma_pred_mode_table[mb_xy] = 0;
5765 // In deblocking, the quantizer is 0
5766 s->current_picture.qscale_table[mb_xy]= 0;
5767 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5768 // All coeffs are present
5769 memset(h->non_zero_count[mb_xy], 16, 16);
5770 s->current_picture.mb_type[mb_xy]= mb_type;
5771 return 0;
5774 fill_caches(h, mb_type, 0);
5776 if( IS_INTRA( mb_type ) ) {
5777 int i;
5778 if( IS_INTRA4x4( mb_type ) ) {
5779 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5780 mb_type |= MB_TYPE_8x8DCT;
5781 for( i = 0; i < 16; i+=4 ) {
5782 int pred = pred_intra_mode( h, i );
5783 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5784 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5786 } else {
5787 for( i = 0; i < 16; i++ ) {
5788 int pred = pred_intra_mode( h, i );
5789 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5791 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5794 write_back_intra_pred_mode(h);
5795 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5796 } else {
5797 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5798 if( h->intra16x16_pred_mode < 0 ) return -1;
5800 h->chroma_pred_mode_table[mb_xy] =
5801 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5803 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5804 if( h->chroma_pred_mode < 0 ) return -1;
5805 } else if( partition_count == 4 ) {
5806 int i, j, sub_partition_count[4], list, ref[2][4];
5808 if( h->slice_type == B_TYPE ) {
5809 for( i = 0; i < 4; i++ ) {
5810 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5811 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5812 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5814 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5815 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5816 pred_direct_motion(h, &mb_type);
5817 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5818 for( i = 0; i < 4; i++ )
5819 if( IS_DIRECT(h->sub_mb_type[i]) )
5820 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5823 } else {
5824 for( i = 0; i < 4; i++ ) {
5825 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5826 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5827 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5831 for( list = 0; list < 2; list++ ) {
5832 if( h->ref_count[list] > 0 ) {
5833 for( i = 0; i < 4; i++ ) {
5834 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5835 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5836 if( h->ref_count[list] > 1 )
5837 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5838 else
5839 ref[list][i] = 0;
5840 } else {
5841 ref[list][i] = -1;
5843 h->ref_cache[list][ scan8[4*i]+1 ]=
5844 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5849 if(dct8x8_allowed)
5850 dct8x8_allowed = get_dct8x8_allowed(h);
5852 for(list=0; list<2; list++){
5853 for(i=0; i<4; i++){
5854 if(IS_DIRECT(h->sub_mb_type[i])){
5855 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5856 continue;
5858 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5860 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5861 const int sub_mb_type= h->sub_mb_type[i];
5862 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5863 for(j=0; j<sub_partition_count[i]; j++){
5864 int mpx, mpy;
5865 int mx, my;
5866 const int index= 4*i + block_width*j;
5867 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5868 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5869 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5871 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5872 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5873 tprintf("final mv:%d %d\n", mx, my);
5875 if(IS_SUB_8X8(sub_mb_type)){
5876 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5877 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5878 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5879 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5881 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5882 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5883 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5884 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5885 }else if(IS_SUB_8X4(sub_mb_type)){
5886 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5887 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5889 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
5890 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
5891 }else if(IS_SUB_4X8(sub_mb_type)){
5892 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5893 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5895 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
5896 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
5897 }else{
5898 assert(IS_SUB_4X4(sub_mb_type));
5899 mv_cache[ 0 ][0]= mx;
5900 mv_cache[ 0 ][1]= my;
5902 mvd_cache[ 0 ][0]= mx - mpx;
5903 mvd_cache[ 0 ][1]= my - mpy;
5906 }else{
5907 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5908 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5909 p[0] = p[1] = p[8] = p[9] = 0;
5910 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5914 } else if( IS_DIRECT(mb_type) ) {
5915 pred_direct_motion(h, &mb_type);
5916 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5917 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5918 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5919 } else {
5920 int list, mx, my, i, mpx, mpy;
5921 if(IS_16X16(mb_type)){
5922 for(list=0; list<2; list++){
5923 if(IS_DIR(mb_type, 0, list)){
5924 if(h->ref_count[list] > 0 ){
5925 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5926 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5928 }else
5929 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
5931 for(list=0; list<2; list++){
5932 if(IS_DIR(mb_type, 0, list)){
5933 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5935 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5936 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5937 tprintf("final mv:%d %d\n", mx, my);
5939 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5940 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5941 }else
5942 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5945 else if(IS_16X8(mb_type)){
5946 for(list=0; list<2; list++){
5947 if(h->ref_count[list]>0){
5948 for(i=0; i<2; i++){
5949 if(IS_DIR(mb_type, i, list)){
5950 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5951 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5952 }else
5953 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5957 for(list=0; list<2; list++){
5958 for(i=0; i<2; i++){
5959 if(IS_DIR(mb_type, i, list)){
5960 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5961 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5962 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5963 tprintf("final mv:%d %d\n", mx, my);
5965 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5966 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5967 }else{
5968 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5969 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5973 }else{
5974 assert(IS_8X16(mb_type));
5975 for(list=0; list<2; list++){
5976 if(h->ref_count[list]>0){
5977 for(i=0; i<2; i++){
5978 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5979 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5980 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5981 }else
5982 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5986 for(list=0; list<2; list++){
5987 for(i=0; i<2; i++){
5988 if(IS_DIR(mb_type, i, list)){
5989 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5990 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5991 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5993 tprintf("final mv:%d %d\n", mx, my);
5994 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5995 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5996 }else{
5997 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5998 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6005 if( IS_INTER( mb_type ) ) {
6006 h->chroma_pred_mode_table[mb_xy] = 0;
6007 write_back_motion( h, mb_type );
6010 if( !IS_INTRA16x16( mb_type ) ) {
6011 cbp = decode_cabac_mb_cbp_luma( h );
6012 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6015 h->cbp_table[mb_xy] = cbp;
6017 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6018 if( decode_cabac_mb_transform_size( h ) )
6019 mb_type |= MB_TYPE_8x8DCT;
6021 s->current_picture.mb_type[mb_xy]= mb_type;
6023 if( cbp || IS_INTRA16x16( mb_type ) ) {
6024 const uint8_t *scan, *dc_scan;
6025 int dqp;
6027 if(IS_INTERLACED(mb_type)){
6028 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6029 dc_scan= luma_dc_field_scan;
6030 }else{
6031 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6032 dc_scan= luma_dc_zigzag_scan;
6035 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6036 s->qscale += dqp;
6037 if(((unsigned)s->qscale) > 51){
6038 if(s->qscale<0) s->qscale+= 52;
6039 else s->qscale-= 52;
6041 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6043 if( IS_INTRA16x16( mb_type ) ) {
6044 int i;
6045 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6046 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0)
6047 return -1;
6048 if( cbp&15 ) {
6049 for( i = 0; i < 16; i++ ) {
6050 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6051 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 )
6052 return -1;
6054 } else {
6055 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6057 } else {
6058 int i8x8, i4x4;
6059 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6060 if( cbp & (1<<i8x8) ) {
6061 if( IS_8x8DCT(mb_type) ) {
6062 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6063 zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 )
6064 return -1;
6065 if(s->qscale < 12){
6066 int i;
6067 for(i=0; i<64; i++)
6068 h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2;
6070 } else
6071 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6072 const int index = 4*i8x8 + i4x4;
6073 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6074 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[s->qscale], 16) < 0 )
6075 return -1;
6077 } else {
6078 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6079 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6084 if( cbp&0x30 ){
6085 int c;
6086 for( c = 0; c < 2; c++ ) {
6087 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6088 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, h->dequant4_coeff[h->chroma_qp], 4) < 0)
6089 return -1;
6093 if( cbp&0x20 ) {
6094 int c, i;
6095 for( c = 0; c < 2; c++ ) {
6096 for( i = 0; i < 4; i++ ) {
6097 const int index = 16 + 4 * c + i;
6098 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6099 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[h->chroma_qp], 15) < 0)
6100 return -1;
6103 } else {
6104 uint8_t * const nnz= &h->non_zero_count_cache[0];
6105 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6106 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6108 } else {
6109 uint8_t * const nnz= &h->non_zero_count_cache[0];
6110 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6111 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6112 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6115 s->current_picture.qscale_table[mb_xy]= s->qscale;
6116 write_back_non_zero_count(h);
6118 return 0;
6122 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6123 int i, d;
6124 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6125 const int alpha = alpha_table[index_a];
6126 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6128 if( bS[0] < 4 ) {
6129 int8_t tc[4];
6130 for(i=0; i<4; i++)
6131 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6132 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6133 } else {
6134 /* 16px edge length, because bS=4 is triggered by being at
6135 * the edge of an intra MB, so all 4 bS are the same */
6136 for( d = 0; d < 16; d++ ) {
6137 const int p0 = pix[-1];
6138 const int p1 = pix[-2];
6139 const int p2 = pix[-3];
6141 const int q0 = pix[0];
6142 const int q1 = pix[1];
6143 const int q2 = pix[2];
6145 if( ABS( p0 - q0 ) < alpha &&
6146 ABS( p1 - p0 ) < beta &&
6147 ABS( q1 - q0 ) < beta ) {
6149 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6150 if( ABS( p2 - p0 ) < beta)
6152 const int p3 = pix[-4];
6153 /* p0', p1', p2' */
6154 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6155 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6156 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6157 } else {
6158 /* p0' */
6159 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6161 if( ABS( q2 - q0 ) < beta)
6163 const int q3 = pix[3];
6164 /* q0', q1', q2' */
6165 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6166 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6167 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6168 } else {
6169 /* q0' */
6170 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6172 }else{
6173 /* p0', q0' */
6174 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6175 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6177 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6179 pix += stride;
6183 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6184 int i, d;
6185 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6186 const int alpha = alpha_table[index_a];
6187 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6189 if( bS[0] < 4 ) {
6190 int8_t tc[4];
6191 for(i=0; i<4; i++)
6192 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6193 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6194 } else {
6195 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6199 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6200 int i;
6201 for( i = 0; i < 16; i++, pix += stride) {
6202 int index_a;
6203 int alpha;
6204 int beta;
6206 int qp_index;
6207 int bS_index = (i >> 1);
6208 if (h->mb_field_decoding_flag) {
6209 bS_index &= ~1;
6210 bS_index |= (i & 1);
6213 if( bS[bS_index] == 0 ) {
6214 continue;
6217 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6218 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6219 alpha = alpha_table[index_a];
6220 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6223 if( bS[bS_index] < 4 ) {
6224 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6225 /* 4px edge length */
6226 const int p0 = pix[-1];
6227 const int p1 = pix[-2];
6228 const int p2 = pix[-3];
6229 const int q0 = pix[0];
6230 const int q1 = pix[1];
6231 const int q2 = pix[2];
6233 if( ABS( p0 - q0 ) < alpha &&
6234 ABS( p1 - p0 ) < beta &&
6235 ABS( q1 - q0 ) < beta ) {
6236 int tc = tc0;
6237 int i_delta;
6239 if( ABS( p2 - p0 ) < beta ) {
6240 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6241 tc++;
6243 if( ABS( q2 - q0 ) < beta ) {
6244 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6245 tc++;
6248 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6249 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6250 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6251 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6253 }else{
6254 /* 4px edge length */
6255 const int p0 = pix[-1];
6256 const int p1 = pix[-2];
6257 const int p2 = pix[-3];
6259 const int q0 = pix[0];
6260 const int q1 = pix[1];
6261 const int q2 = pix[2];
6263 if( ABS( p0 - q0 ) < alpha &&
6264 ABS( p1 - p0 ) < beta &&
6265 ABS( q1 - q0 ) < beta ) {
6267 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6268 if( ABS( p2 - p0 ) < beta)
6270 const int p3 = pix[-4];
6271 /* p0', p1', p2' */
6272 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6273 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6274 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6275 } else {
6276 /* p0' */
6277 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6279 if( ABS( q2 - q0 ) < beta)
6281 const int q3 = pix[3];
6282 /* q0', q1', q2' */
6283 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6284 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6285 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6286 } else {
6287 /* q0' */
6288 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6290 }else{
6291 /* p0', q0' */
6292 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6293 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6295 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6300 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6301 int i;
6302 for( i = 0; i < 8; i++, pix += stride) {
6303 int index_a;
6304 int alpha;
6305 int beta;
6307 int qp_index;
6308 int bS_index = i;
6310 if( bS[bS_index] == 0 ) {
6311 continue;
6314 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6315 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6316 alpha = alpha_table[index_a];
6317 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6318 if( bS[bS_index] < 4 ) {
6319 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6320 /* 2px edge length (because we use same bS than the one for luma) */
6321 const int p0 = pix[-1];
6322 const int p1 = pix[-2];
6323 const int q0 = pix[0];
6324 const int q1 = pix[1];
6326 if( ABS( p0 - q0 ) < alpha &&
6327 ABS( p1 - p0 ) < beta &&
6328 ABS( q1 - q0 ) < beta ) {
6329 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6331 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6332 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6333 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6335 }else{
6336 const int p0 = pix[-1];
6337 const int p1 = pix[-2];
6338 const int q0 = pix[0];
6339 const int q1 = pix[1];
6341 if( ABS( p0 - q0 ) < alpha &&
6342 ABS( p1 - p0 ) < beta &&
6343 ABS( q1 - q0 ) < beta ) {
6345 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6346 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6347 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6353 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6354 int i, d;
6355 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6356 const int alpha = alpha_table[index_a];
6357 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6358 const int pix_next = stride;
6360 if( bS[0] < 4 ) {
6361 int8_t tc[4];
6362 for(i=0; i<4; i++)
6363 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6364 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6365 } else {
6366 /* 16px edge length, see filter_mb_edgev */
6367 for( d = 0; d < 16; d++ ) {
6368 const int p0 = pix[-1*pix_next];
6369 const int p1 = pix[-2*pix_next];
6370 const int p2 = pix[-3*pix_next];
6371 const int q0 = pix[0];
6372 const int q1 = pix[1*pix_next];
6373 const int q2 = pix[2*pix_next];
6375 if( ABS( p0 - q0 ) < alpha &&
6376 ABS( p1 - p0 ) < beta &&
6377 ABS( q1 - q0 ) < beta ) {
6379 const int p3 = pix[-4*pix_next];
6380 const int q3 = pix[ 3*pix_next];
6382 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6383 if( ABS( p2 - p0 ) < beta) {
6384 /* p0', p1', p2' */
6385 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6386 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6387 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6388 } else {
6389 /* p0' */
6390 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6392 if( ABS( q2 - q0 ) < beta) {
6393 /* q0', q1', q2' */
6394 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6395 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6396 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6397 } else {
6398 /* q0' */
6399 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6401 }else{
6402 /* p0', q0' */
6403 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6404 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6406 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6408 pix++;
6413 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6414 int i, d;
6415 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6416 const int alpha = alpha_table[index_a];
6417 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6419 if( bS[0] < 4 ) {
6420 int8_t tc[4];
6421 for(i=0; i<4; i++)
6422 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6423 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6424 } else {
6425 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6429 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6430 MpegEncContext * const s = &h->s;
6431 const int mb_xy= mb_x + mb_y*s->mb_stride;
6432 int first_vertical_edge_done = 0;
6433 int dir;
6434 /* FIXME: A given frame may occupy more than one position in
6435 * the reference list. So ref2frm should be populated with
6436 * frame numbers, not indices. */
6437 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6439 if (h->mb_aff_frame
6440 // left mb is in picture
6441 && h->slice_table[mb_xy-1] != 255
6442 // and current and left pair do not have the same interlaced type
6443 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6444 // and left mb is in the same slice if deblocking_filter == 2
6445 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6446 /* First vertical edge is different in MBAFF frames
6447 * There are 8 different bS to compute and 2 different Qp
6449 int bS[8];
6450 int qp[2];
6451 int chroma_qp[2];
6453 int i;
6454 first_vertical_edge_done = 1;
6455 for( i = 0; i < 8; i++ ) {
6456 int y = i>>1;
6457 int b_idx= 8 + 4 + 8*y;
6458 int bn_idx= b_idx - 1;
6460 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6462 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6463 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6464 bS[i] = 4;
6465 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6466 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6467 h->non_zero_count_cache[bn_idx] != 0 ) {
6468 bS[i] = 2;
6469 } else {
6470 int l;
6471 bS[i] = 0;
6472 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6473 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6474 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6475 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6476 bS[i] = 1;
6477 break;
6482 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6483 // Do not use s->qscale as luma quantizer because it has not the same
6484 // value in IPCM macroblocks.
6485 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6486 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6487 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6488 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6489 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6490 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6492 /* Filter edge */
6493 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6494 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6495 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6496 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6497 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6500 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6501 for( dir = 0; dir < 2; dir++ )
6503 int edge;
6504 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6505 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6507 if (first_vertical_edge_done) {
6508 start = 1;
6509 first_vertical_edge_done = 0;
6512 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6513 start = 1;
6515 /* Calculate bS */
6516 for( edge = start; edge < 4; edge++ ) {
6517 /* mbn_xy: neighbor macroblock */
6518 int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6519 int bS[4];
6520 int qp;
6522 if( (edge&1) && IS_8x8DCT(s->current_picture.mb_type[mb_xy]) )
6523 continue;
6525 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6526 && !IS_INTERLACED(s->current_picture.mb_type[mb_xy])
6527 && IS_INTERLACED(s->current_picture.mb_type[mbn_xy])
6529 // This is a special case in the norm where the filtering must
6530 // be done twice (one each of the field) even if we are in a
6531 // frame macroblock.
6533 unsigned int tmp_linesize = 2 * linesize;
6534 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6535 int mbn_xy = mb_xy - 2 * s->mb_stride;
6536 int qp, chroma_qp;
6538 // first filtering
6539 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6540 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6541 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6542 } else {
6543 // TODO
6544 assert(0);
6546 /* Filter edge */
6547 // Do not use s->qscale as luma quantizer because it has not the same
6548 // value in IPCM macroblocks.
6549 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6550 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6551 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6552 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6553 chroma_qp = ( h->chroma_qp +
6554 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6555 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6556 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6558 // second filtering
6559 mbn_xy += s->mb_stride;
6560 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6561 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6562 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6563 } else {
6564 // TODO
6565 assert(0);
6567 /* Filter edge */
6568 // Do not use s->qscale as luma quantizer because it has not the same
6569 // value in IPCM macroblocks.
6570 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6571 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6572 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6573 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6574 chroma_qp = ( h->chroma_qp +
6575 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6576 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6577 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6578 continue;
6580 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6581 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6582 int value;
6583 if (edge == 0) {
6584 if ( (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy]))
6585 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6587 value = 4;
6588 } else {
6589 value = 3;
6591 } else {
6592 value = 3;
6594 bS[0] = bS[1] = bS[2] = bS[3] = value;
6595 } else {
6596 int i;
6597 for( i = 0; i < 4; i++ ) {
6598 int x = dir == 0 ? edge : i;
6599 int y = dir == 0 ? i : edge;
6600 int b_idx= 8 + 4 + x + 8*y;
6601 int bn_idx= b_idx - (dir ? 8:1);
6603 if( h->non_zero_count_cache[b_idx] != 0 ||
6604 h->non_zero_count_cache[bn_idx] != 0 ) {
6605 bS[i] = 2;
6607 else
6609 int l;
6610 bS[i] = 0;
6611 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6612 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6613 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6614 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6615 bS[i] = 1;
6616 break;
6622 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6623 continue;
6626 /* Filter edge */
6627 // Do not use s->qscale as luma quantizer because it has not the same
6628 // value in IPCM macroblocks.
6629 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6630 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6631 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6632 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6633 if( dir == 0 ) {
6634 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6635 if( (edge&1) == 0 ) {
6636 int chroma_qp = ( h->chroma_qp +
6637 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6638 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6639 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6641 } else {
6642 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6643 if( (edge&1) == 0 ) {
6644 int chroma_qp = ( h->chroma_qp +
6645 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6646 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6647 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6654 static int decode_slice(H264Context *h){
6655 MpegEncContext * const s = &h->s;
6656 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6658 s->mb_skip_run= -1;
6660 if( h->pps.cabac ) {
6661 int i;
6663 /* realign */
6664 align_get_bits( &s->gb );
6666 /* init cabac */
6667 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6668 ff_init_cabac_decoder( &h->cabac,
6669 s->gb.buffer + get_bits_count(&s->gb)/8,
6670 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6671 /* calculate pre-state */
6672 for( i= 0; i < 460; i++ ) {
6673 int pre;
6674 if( h->slice_type == I_TYPE )
6675 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6676 else
6677 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6679 if( pre <= 63 )
6680 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6681 else
6682 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6685 for(;;){
6686 int ret = decode_mb_cabac(h);
6687 int eos;
6689 if(ret>=0) hl_decode_mb(h);
6691 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6692 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6693 s->mb_y++;
6695 if(ret>=0) ret = decode_mb_cabac(h);
6697 hl_decode_mb(h);
6698 s->mb_y--;
6700 eos = get_cabac_terminate( &h->cabac );
6702 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6703 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6704 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6705 return -1;
6708 if( ++s->mb_x >= s->mb_width ) {
6709 s->mb_x = 0;
6710 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6711 ++s->mb_y;
6712 if(h->mb_aff_frame) {
6713 ++s->mb_y;
6717 if( eos || s->mb_y >= s->mb_height ) {
6718 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6719 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6720 return 0;
6724 } else {
6725 for(;;){
6726 int ret = decode_mb_cavlc(h);
6728 if(ret>=0) hl_decode_mb(h);
6730 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6731 s->mb_y++;
6732 ret = decode_mb_cavlc(h);
6734 if(ret>=0) hl_decode_mb(h);
6735 s->mb_y--;
6738 if(ret<0){
6739 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6740 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6742 return -1;
6745 if(++s->mb_x >= s->mb_width){
6746 s->mb_x=0;
6747 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6748 ++s->mb_y;
6749 if(h->mb_aff_frame) {
6750 ++s->mb_y;
6752 if(s->mb_y >= s->mb_height){
6753 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6755 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6756 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6758 return 0;
6759 }else{
6760 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6762 return -1;
6767 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6768 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6769 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6770 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6772 return 0;
6773 }else{
6774 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6776 return -1;
6782 #if 0
6783 for(;s->mb_y < s->mb_height; s->mb_y++){
6784 for(;s->mb_x < s->mb_width; s->mb_x++){
6785 int ret= decode_mb(h);
6787 hl_decode_mb(h);
6789 if(ret<0){
6790 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6791 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6793 return -1;
6796 if(++s->mb_x >= s->mb_width){
6797 s->mb_x=0;
6798 if(++s->mb_y >= s->mb_height){
6799 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6800 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6802 return 0;
6803 }else{
6804 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6806 return -1;
6811 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6812 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6813 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6815 return 0;
6816 }else{
6817 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6819 return -1;
6823 s->mb_x=0;
6824 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6826 #endif
6827 return -1; //not reached
6830 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6831 MpegEncContext * const s = &h->s;
6832 int cpb_count, i;
6833 cpb_count = get_ue_golomb(&s->gb) + 1;
6834 get_bits(&s->gb, 4); /* bit_rate_scale */
6835 get_bits(&s->gb, 4); /* cpb_size_scale */
6836 for(i=0; i<cpb_count; i++){
6837 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6838 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6839 get_bits1(&s->gb); /* cbr_flag */
6841 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6842 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6843 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6844 get_bits(&s->gb, 5); /* time_offset_length */
6847 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6848 MpegEncContext * const s = &h->s;
6849 int aspect_ratio_info_present_flag, aspect_ratio_idc;
6850 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6852 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6854 if( aspect_ratio_info_present_flag ) {
6855 aspect_ratio_idc= get_bits(&s->gb, 8);
6856 if( aspect_ratio_idc == EXTENDED_SAR ) {
6857 sps->sar.num= get_bits(&s->gb, 16);
6858 sps->sar.den= get_bits(&s->gb, 16);
6859 }else if(aspect_ratio_idc < 16){
6860 sps->sar= pixel_aspect[aspect_ratio_idc];
6861 }else{
6862 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6863 return -1;
6865 }else{
6866 sps->sar.num=
6867 sps->sar.den= 0;
6869 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6871 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6872 get_bits1(&s->gb); /* overscan_appropriate_flag */
6875 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6876 get_bits(&s->gb, 3); /* video_format */
6877 get_bits1(&s->gb); /* video_full_range_flag */
6878 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6879 get_bits(&s->gb, 8); /* colour_primaries */
6880 get_bits(&s->gb, 8); /* transfer_characteristics */
6881 get_bits(&s->gb, 8); /* matrix_coefficients */
6885 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6886 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6887 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6890 sps->timing_info_present_flag = get_bits1(&s->gb);
6891 if(sps->timing_info_present_flag){
6892 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6893 sps->time_scale = get_bits_long(&s->gb, 32);
6894 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6897 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6898 if(nal_hrd_parameters_present_flag)
6899 decode_hrd_parameters(h, sps);
6900 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6901 if(vcl_hrd_parameters_present_flag)
6902 decode_hrd_parameters(h, sps);
6903 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6904 get_bits1(&s->gb); /* low_delay_hrd_flag */
6905 get_bits1(&s->gb); /* pic_struct_present_flag */
6907 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6908 if(sps->bitstream_restriction_flag){
6909 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6910 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6911 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6912 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6913 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6914 sps->num_reorder_frames = get_ue_golomb(&s->gb);
6915 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
6918 return 0;
6921 static inline int decode_seq_parameter_set(H264Context *h){
6922 MpegEncContext * const s = &h->s;
6923 int profile_idc, level_idc;
6924 int sps_id, i;
6925 SPS *sps;
6927 profile_idc= get_bits(&s->gb, 8);
6928 get_bits1(&s->gb); //constraint_set0_flag
6929 get_bits1(&s->gb); //constraint_set1_flag
6930 get_bits1(&s->gb); //constraint_set2_flag
6931 get_bits1(&s->gb); //constraint_set3_flag
6932 get_bits(&s->gb, 4); // reserved
6933 level_idc= get_bits(&s->gb, 8);
6934 sps_id= get_ue_golomb(&s->gb);
6936 sps= &h->sps_buffer[ sps_id ];
6937 sps->profile_idc= profile_idc;
6938 sps->level_idc= level_idc;
6940 if(sps->profile_idc >= 100){ //high profile
6941 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
6942 get_bits1(&s->gb); //residual_color_transform_flag
6943 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6944 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6945 sps->transform_bypass = get_bits1(&s->gb);
6946 if(get_bits1(&s->gb)){ //seq_scaling_matrix_present_flag
6947 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
6948 return -1;
6952 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6953 sps->poc_type= get_ue_golomb(&s->gb);
6955 if(sps->poc_type == 0){ //FIXME #define
6956 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6957 } else if(sps->poc_type == 1){//FIXME #define
6958 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6959 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6960 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6961 sps->poc_cycle_length= get_ue_golomb(&s->gb);
6963 for(i=0; i<sps->poc_cycle_length; i++)
6964 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6966 if(sps->poc_type > 2){
6967 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6968 return -1;
6971 sps->ref_frame_count= get_ue_golomb(&s->gb);
6972 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
6973 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
6975 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
6976 sps->mb_width= get_ue_golomb(&s->gb) + 1;
6977 sps->mb_height= get_ue_golomb(&s->gb) + 1;
6978 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
6979 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
6980 return -1;
6982 sps->frame_mbs_only_flag= get_bits1(&s->gb);
6983 if(!sps->frame_mbs_only_flag)
6984 sps->mb_aff= get_bits1(&s->gb);
6985 else
6986 sps->mb_aff= 0;
6988 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
6990 sps->crop= get_bits1(&s->gb);
6991 if(sps->crop){
6992 sps->crop_left = get_ue_golomb(&s->gb);
6993 sps->crop_right = get_ue_golomb(&s->gb);
6994 sps->crop_top = get_ue_golomb(&s->gb);
6995 sps->crop_bottom= get_ue_golomb(&s->gb);
6996 if(sps->crop_left || sps->crop_top){
6997 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
6999 }else{
7000 sps->crop_left =
7001 sps->crop_right =
7002 sps->crop_top =
7003 sps->crop_bottom= 0;
7006 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7007 if( sps->vui_parameters_present_flag )
7008 decode_vui_parameters(h, sps);
7010 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7011 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7012 sps_id, sps->profile_idc, sps->level_idc,
7013 sps->poc_type,
7014 sps->ref_frame_count,
7015 sps->mb_width, sps->mb_height,
7016 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7017 sps->direct_8x8_inference_flag ? "8B8" : "",
7018 sps->crop_left, sps->crop_right,
7019 sps->crop_top, sps->crop_bottom,
7020 sps->vui_parameters_present_flag ? "VUI" : ""
7023 return 0;
7026 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7027 MpegEncContext * const s = &h->s;
7028 int pps_id= get_ue_golomb(&s->gb);
7029 PPS *pps= &h->pps_buffer[pps_id];
7031 pps->sps_id= get_ue_golomb(&s->gb);
7032 pps->cabac= get_bits1(&s->gb);
7033 pps->pic_order_present= get_bits1(&s->gb);
7034 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7035 if(pps->slice_group_count > 1 ){
7036 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7037 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7038 switch(pps->mb_slice_group_map_type){
7039 case 0:
7040 #if 0
7041 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7042 | run_length[ i ] |1 |ue(v) |
7043 #endif
7044 break;
7045 case 2:
7046 #if 0
7047 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7048 |{ | | |
7049 | top_left_mb[ i ] |1 |ue(v) |
7050 | bottom_right_mb[ i ] |1 |ue(v) |
7051 | } | | |
7052 #endif
7053 break;
7054 case 3:
7055 case 4:
7056 case 5:
7057 #if 0
7058 | slice_group_change_direction_flag |1 |u(1) |
7059 | slice_group_change_rate_minus1 |1 |ue(v) |
7060 #endif
7061 break;
7062 case 6:
7063 #if 0
7064 | slice_group_id_cnt_minus1 |1 |ue(v) |
7065 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7066 |) | | |
7067 | slice_group_id[ i ] |1 |u(v) |
7068 #endif
7069 break;
7072 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7073 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7074 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7075 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7076 return -1;
7079 pps->weighted_pred= get_bits1(&s->gb);
7080 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7081 pps->init_qp= get_se_golomb(&s->gb) + 26;
7082 pps->init_qs= get_se_golomb(&s->gb) + 26;
7083 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7084 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7085 pps->constrained_intra_pred= get_bits1(&s->gb);
7086 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7088 if(get_bits_count(&s->gb) < bit_length){
7089 pps->transform_8x8_mode= get_bits1(&s->gb);
7090 if(get_bits1(&s->gb)){ //pic_scaling_matrix_present_flag
7091 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
7092 return -1;
7094 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7097 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7098 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7099 pps_id, pps->sps_id,
7100 pps->cabac ? "CABAC" : "CAVLC",
7101 pps->slice_group_count,
7102 pps->ref_count[0], pps->ref_count[1],
7103 pps->weighted_pred ? "weighted" : "",
7104 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7105 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7106 pps->constrained_intra_pred ? "CONSTR" : "",
7107 pps->redundant_pic_cnt_present ? "REDU" : "",
7108 pps->transform_8x8_mode ? "8x8DCT" : ""
7112 return 0;
7116 * finds the end of the current frame in the bitstream.
7117 * @return the position of the first byte of the next frame, or -1
7119 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7120 int i;
7121 uint32_t state;
7122 ParseContext *pc = &(h->s.parse_context);
7123 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7124 // mb_addr= pc->mb_addr - 1;
7125 state= pc->state;
7126 for(i=0; i<=buf_size; i++){
7127 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7128 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7129 if(pc->frame_start_found){
7130 // If there isn't one more byte in the buffer
7131 // the test on first_mb_in_slice cannot be done yet
7132 // do it at next call.
7133 if (i >= buf_size) break;
7134 if (buf[i] & 0x80) {
7135 // first_mb_in_slice is 0, probably the first nal of a new
7136 // slice
7137 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7138 pc->state=-1;
7139 pc->frame_start_found= 0;
7140 return i-4;
7143 pc->frame_start_found = 1;
7145 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7146 if(pc->frame_start_found){
7147 pc->state=-1;
7148 pc->frame_start_found= 0;
7149 return i-4;
7152 if (i<buf_size)
7153 state= (state<<8) | buf[i];
7156 pc->state= state;
7157 return END_NOT_FOUND;
7160 static int h264_parse(AVCodecParserContext *s,
7161 AVCodecContext *avctx,
7162 uint8_t **poutbuf, int *poutbuf_size,
7163 const uint8_t *buf, int buf_size)
7165 H264Context *h = s->priv_data;
7166 ParseContext *pc = &h->s.parse_context;
7167 int next;
7169 next= find_frame_end(h, buf, buf_size);
7171 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7172 *poutbuf = NULL;
7173 *poutbuf_size = 0;
7174 return buf_size;
7177 *poutbuf = (uint8_t *)buf;
7178 *poutbuf_size = buf_size;
7179 return next;
7182 static int h264_split(AVCodecContext *avctx,
7183 const uint8_t *buf, int buf_size)
7185 int i;
7186 uint32_t state = -1;
7187 int has_sps= 0;
7189 for(i=0; i<=buf_size; i++){
7190 if((state&0xFFFFFF1F) == 0x107)
7191 has_sps=1;
7192 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7194 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7195 if(has_sps){
7196 while(i>4 && buf[i-5]==0) i--;
7197 return i-4;
7200 if (i<buf_size)
7201 state= (state<<8) | buf[i];
7203 return 0;
7207 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7208 MpegEncContext * const s = &h->s;
7209 AVCodecContext * const avctx= s->avctx;
7210 int buf_index=0;
7211 #if 0
7212 int i;
7213 for(i=0; i<32; i++){
7214 printf("%X ", buf[i]);
7216 #endif
7217 h->slice_num = 0;
7218 s->current_picture_ptr= NULL;
7219 for(;;){
7220 int consumed;
7221 int dst_length;
7222 int bit_length;
7223 uint8_t *ptr;
7224 int i, nalsize = 0;
7226 if(h->is_avc) {
7227 if(buf_index >= buf_size) break;
7228 nalsize = 0;
7229 for(i = 0; i < h->nal_length_size; i++)
7230 nalsize = (nalsize << 8) | buf[buf_index++];
7231 } else {
7232 // start code prefix search
7233 for(; buf_index + 3 < buf_size; buf_index++){
7234 // this should allways succeed in the first iteration
7235 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7236 break;
7239 if(buf_index+3 >= buf_size) break;
7241 buf_index+=3;
7244 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7245 if(ptr[dst_length - 1] == 0) dst_length--;
7246 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7248 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7249 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7252 if (h->is_avc && (nalsize != consumed))
7253 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7255 buf_index += consumed;
7257 if( (s->hurry_up == 1 && h->nal_ref_idc == 0)
7258 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7259 continue;
7261 switch(h->nal_unit_type){
7262 case NAL_IDR_SLICE:
7263 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7264 case NAL_SLICE:
7265 init_get_bits(&s->gb, ptr, bit_length);
7266 h->intra_gb_ptr=
7267 h->inter_gb_ptr= &s->gb;
7268 s->data_partitioning = 0;
7270 if(decode_slice_header(h) < 0){
7271 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7272 break;
7274 if(h->redundant_pic_count==0 && s->hurry_up < 5
7275 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7276 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7277 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7278 && avctx->skip_frame < AVDISCARD_ALL)
7279 decode_slice(h);
7280 break;
7281 case NAL_DPA:
7282 init_get_bits(&s->gb, ptr, bit_length);
7283 h->intra_gb_ptr=
7284 h->inter_gb_ptr= NULL;
7285 s->data_partitioning = 1;
7287 if(decode_slice_header(h) < 0){
7288 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7290 break;
7291 case NAL_DPB:
7292 init_get_bits(&h->intra_gb, ptr, bit_length);
7293 h->intra_gb_ptr= &h->intra_gb;
7294 break;
7295 case NAL_DPC:
7296 init_get_bits(&h->inter_gb, ptr, bit_length);
7297 h->inter_gb_ptr= &h->inter_gb;
7299 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7300 && s->hurry_up < 5
7301 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7302 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7303 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7304 && avctx->skip_frame < AVDISCARD_ALL)
7305 decode_slice(h);
7306 break;
7307 case NAL_SEI:
7308 break;
7309 case NAL_SPS:
7310 init_get_bits(&s->gb, ptr, bit_length);
7311 decode_seq_parameter_set(h);
7313 if(s->flags& CODEC_FLAG_LOW_DELAY)
7314 s->low_delay=1;
7316 if(avctx->has_b_frames < 2)
7317 avctx->has_b_frames= !s->low_delay;
7318 break;
7319 case NAL_PPS:
7320 init_get_bits(&s->gb, ptr, bit_length);
7322 decode_picture_parameter_set(h, bit_length);
7324 break;
7325 case NAL_PICTURE_DELIMITER:
7326 break;
7327 case NAL_FILTER_DATA:
7328 break;
7329 default:
7330 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7334 if(!s->current_picture_ptr) return buf_index; //no frame
7336 s->current_picture_ptr->pict_type= s->pict_type;
7337 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7339 h->prev_frame_num_offset= h->frame_num_offset;
7340 h->prev_frame_num= h->frame_num;
7341 if(s->current_picture_ptr->reference){
7342 h->prev_poc_msb= h->poc_msb;
7343 h->prev_poc_lsb= h->poc_lsb;
7345 if(s->current_picture_ptr->reference)
7346 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7348 ff_er_frame_end(s);
7350 MPV_frame_end(s);
7352 return buf_index;
7356 * returns the number of bytes consumed for building the current frame
7358 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7359 if(s->flags&CODEC_FLAG_TRUNCATED){
7360 pos -= s->parse_context.last_index;
7361 if(pos<0) pos=0; // FIXME remove (unneeded?)
7363 return pos;
7364 }else{
7365 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7366 if(pos+10>buf_size) pos=buf_size; // oops ;)
7368 return pos;
7372 static int decode_frame(AVCodecContext *avctx,
7373 void *data, int *data_size,
7374 uint8_t *buf, int buf_size)
7376 H264Context *h = avctx->priv_data;
7377 MpegEncContext *s = &h->s;
7378 AVFrame *pict = data;
7379 int buf_index;
7381 s->flags= avctx->flags;
7382 s->flags2= avctx->flags2;
7384 /* no supplementary picture */
7385 if (buf_size == 0) {
7386 return 0;
7389 if(s->flags&CODEC_FLAG_TRUNCATED){
7390 int next= find_frame_end(h, buf, buf_size);
7392 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7393 return buf_size;
7394 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7397 if(h->is_avc && !h->got_avcC) {
7398 int i, cnt, nalsize;
7399 unsigned char *p = avctx->extradata;
7400 if(avctx->extradata_size < 7) {
7401 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7402 return -1;
7404 if(*p != 1) {
7405 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7406 return -1;
7408 /* sps and pps in the avcC always have length coded with 2 bytes,
7409 so put a fake nal_length_size = 2 while parsing them */
7410 h->nal_length_size = 2;
7411 // Decode sps from avcC
7412 cnt = *(p+5) & 0x1f; // Number of sps
7413 p += 6;
7414 for (i = 0; i < cnt; i++) {
7415 nalsize = BE_16(p) + 2;
7416 if(decode_nal_units(h, p, nalsize) != nalsize) {
7417 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7418 return -1;
7420 p += nalsize;
7422 // Decode pps from avcC
7423 cnt = *(p++); // Number of pps
7424 for (i = 0; i < cnt; i++) {
7425 nalsize = BE_16(p) + 2;
7426 if(decode_nal_units(h, p, nalsize) != nalsize) {
7427 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7428 return -1;
7430 p += nalsize;
7432 // Now store right nal length size, that will be use to parse all other nals
7433 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7434 // Do not reparse avcC
7435 h->got_avcC = 1;
7438 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7439 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7440 return -1;
7443 buf_index=decode_nal_units(h, buf, buf_size);
7444 if(buf_index < 0)
7445 return -1;
7447 //FIXME do something with unavailable reference frames
7449 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7450 if(!s->current_picture_ptr){
7451 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7452 return -1;
7456 Picture *out = s->current_picture_ptr;
7457 #if 0 //decode order
7458 *data_size = sizeof(AVFrame);
7459 #else
7460 /* Sort B-frames into display order */
7461 Picture *cur = s->current_picture_ptr;
7462 Picture *prev = h->delayed_output_pic;
7463 int out_idx = 0;
7464 int pics = 0;
7465 int out_of_order;
7466 int cross_idr = 0;
7467 int dropped_frame = 0;
7468 int i;
7470 if(h->sps.bitstream_restriction_flag
7471 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7472 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7473 s->low_delay = 0;
7476 while(h->delayed_pic[pics]) pics++;
7477 h->delayed_pic[pics++] = cur;
7478 if(cur->reference == 0)
7479 cur->reference = 1;
7481 for(i=0; h->delayed_pic[i]; i++)
7482 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7483 cross_idr = 1;
7485 out = h->delayed_pic[0];
7486 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7487 if(h->delayed_pic[i]->poc < out->poc){
7488 out = h->delayed_pic[i];
7489 out_idx = i;
7492 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7493 if(prev && pics <= s->avctx->has_b_frames)
7494 out = prev;
7495 else if((out_of_order && pics-1 == s->avctx->has_b_frames)
7496 || (s->low_delay &&
7497 ((!cross_idr && prev && out->poc > prev->poc + 2)
7498 || cur->pict_type == B_TYPE)))
7500 s->low_delay = 0;
7501 s->avctx->has_b_frames++;
7502 out = prev;
7504 else if(out_of_order)
7505 out = prev;
7507 if(out_of_order || pics > s->avctx->has_b_frames){
7508 dropped_frame = (out != h->delayed_pic[out_idx]);
7509 for(i=out_idx; h->delayed_pic[i]; i++)
7510 h->delayed_pic[i] = h->delayed_pic[i+1];
7513 if(prev == out && !dropped_frame)
7514 *data_size = 0;
7515 else
7516 *data_size = sizeof(AVFrame);
7517 if(prev && prev != out && prev->reference == 1)
7518 prev->reference = 0;
7519 h->delayed_output_pic = out;
7520 #endif
7522 *pict= *(AVFrame*)out;
7525 assert(pict->data[0]);
7526 ff_print_debug_info(s, pict);
7527 //printf("out %d\n", (int)pict->data[0]);
7528 #if 0 //?
7530 /* Return the Picture timestamp as the frame number */
7531 /* we substract 1 because it is added on utils.c */
7532 avctx->frame_number = s->picture_number - 1;
7533 #endif
7534 return get_consumed_bytes(s, buf_index, buf_size);
7536 #if 0
7537 static inline void fill_mb_avail(H264Context *h){
7538 MpegEncContext * const s = &h->s;
7539 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7541 if(s->mb_y){
7542 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7543 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7544 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7545 }else{
7546 h->mb_avail[0]=
7547 h->mb_avail[1]=
7548 h->mb_avail[2]= 0;
7550 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7551 h->mb_avail[4]= 1; //FIXME move out
7552 h->mb_avail[5]= 0; //FIXME move out
7554 #endif
7556 #if 0 //selftest
7557 #define COUNT 8000
7558 #define SIZE (COUNT*40)
7559 int main(){
7560 int i;
7561 uint8_t temp[SIZE];
7562 PutBitContext pb;
7563 GetBitContext gb;
7564 // int int_temp[10000];
7565 DSPContext dsp;
7566 AVCodecContext avctx;
7568 dsputil_init(&dsp, &avctx);
7570 init_put_bits(&pb, temp, SIZE);
7571 printf("testing unsigned exp golomb\n");
7572 for(i=0; i<COUNT; i++){
7573 START_TIMER
7574 set_ue_golomb(&pb, i);
7575 STOP_TIMER("set_ue_golomb");
7577 flush_put_bits(&pb);
7579 init_get_bits(&gb, temp, 8*SIZE);
7580 for(i=0; i<COUNT; i++){
7581 int j, s;
7583 s= show_bits(&gb, 24);
7585 START_TIMER
7586 j= get_ue_golomb(&gb);
7587 if(j != i){
7588 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7589 // return -1;
7591 STOP_TIMER("get_ue_golomb");
7595 init_put_bits(&pb, temp, SIZE);
7596 printf("testing signed exp golomb\n");
7597 for(i=0; i<COUNT; i++){
7598 START_TIMER
7599 set_se_golomb(&pb, i - COUNT/2);
7600 STOP_TIMER("set_se_golomb");
7602 flush_put_bits(&pb);
7604 init_get_bits(&gb, temp, 8*SIZE);
7605 for(i=0; i<COUNT; i++){
7606 int j, s;
7608 s= show_bits(&gb, 24);
7610 START_TIMER
7611 j= get_se_golomb(&gb);
7612 if(j != i - COUNT/2){
7613 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7614 // return -1;
7616 STOP_TIMER("get_se_golomb");
7619 printf("testing 4x4 (I)DCT\n");
7621 DCTELEM block[16];
7622 uint8_t src[16], ref[16];
7623 uint64_t error= 0, max_error=0;
7625 for(i=0; i<COUNT; i++){
7626 int j;
7627 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7628 for(j=0; j<16; j++){
7629 ref[j]= random()%255;
7630 src[j]= random()%255;
7633 h264_diff_dct_c(block, src, ref, 4);
7635 //normalize
7636 for(j=0; j<16; j++){
7637 // printf("%d ", block[j]);
7638 block[j]= block[j]*4;
7639 if(j&1) block[j]= (block[j]*4 + 2)/5;
7640 if(j&4) block[j]= (block[j]*4 + 2)/5;
7642 // printf("\n");
7644 s->dsp.h264_idct_add(ref, block, 4);
7645 /* for(j=0; j<16; j++){
7646 printf("%d ", ref[j]);
7648 printf("\n");*/
7650 for(j=0; j<16; j++){
7651 int diff= ABS(src[j] - ref[j]);
7653 error+= diff*diff;
7654 max_error= FFMAX(max_error, diff);
7657 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7658 #if 0
7659 printf("testing quantizer\n");
7660 for(qp=0; qp<52; qp++){
7661 for(i=0; i<16; i++)
7662 src1_block[i]= src2_block[i]= random()%255;
7665 #endif
7666 printf("Testing NAL layer\n");
7668 uint8_t bitstream[COUNT];
7669 uint8_t nal[COUNT*2];
7670 H264Context h;
7671 memset(&h, 0, sizeof(H264Context));
7673 for(i=0; i<COUNT; i++){
7674 int zeros= i;
7675 int nal_length;
7676 int consumed;
7677 int out_length;
7678 uint8_t *out;
7679 int j;
7681 for(j=0; j<COUNT; j++){
7682 bitstream[j]= (random() % 255) + 1;
7685 for(j=0; j<zeros; j++){
7686 int pos= random() % COUNT;
7687 while(bitstream[pos] == 0){
7688 pos++;
7689 pos %= COUNT;
7691 bitstream[pos]=0;
7694 START_TIMER
7696 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7697 if(nal_length<0){
7698 printf("encoding failed\n");
7699 return -1;
7702 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7704 STOP_TIMER("NAL")
7706 if(out_length != COUNT){
7707 printf("incorrect length %d %d\n", out_length, COUNT);
7708 return -1;
7711 if(consumed != nal_length){
7712 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7713 return -1;
7716 if(memcmp(bitstream, out, COUNT)){
7717 printf("missmatch\n");
7718 return -1;
7722 printf("Testing RBSP\n");
7725 return 0;
7727 #endif
7730 static int decode_end(AVCodecContext *avctx)
7732 H264Context *h = avctx->priv_data;
7733 MpegEncContext *s = &h->s;
7735 free_tables(h); //FIXME cleanup init stuff perhaps
7736 MPV_common_end(s);
7738 // memset(h, 0, sizeof(H264Context));
7740 return 0;
7744 AVCodec h264_decoder = {
7745 "h264",
7746 CODEC_TYPE_VIDEO,
7747 CODEC_ID_H264,
7748 sizeof(H264Context),
7749 decode_init,
7750 NULL,
7751 decode_end,
7752 decode_frame,
7753 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
7754 .flush= flush_dpb,
7757 AVCodecParser h264_parser = {
7758 { CODEC_ID_H264 },
7759 sizeof(H264Context),
7760 NULL,
7761 h264_parse,
7762 ff_parse_close,
7763 h264_split,
7766 #include "svq3.c"