ordered chapters: seek to closest keyframe
[FFMpeg-mirror/ordered_chapters.git] / libavcodec / h264.c
blobbe0abd45a3b07dae1d9c6598ae9e3d7f28a12883
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "mathops.h"
36 #include "rectangle.h"
37 #include "vdpau_internal.h"
39 #include "cabac.h"
40 #if ARCH_X86
41 #include "x86/h264_i386.h"
42 #endif
44 //#undef NDEBUG
45 #include <assert.h>
47 /**
48 * Value of Picture.reference when Picture is not a reference picture, but
49 * is held for delayed output.
51 #define DELAYED_PIC_REF 4
53 static VLC coeff_token_vlc[4];
54 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
55 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57 static VLC chroma_dc_coeff_token_vlc;
58 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
59 static const int chroma_dc_coeff_token_vlc_table_size = 256;
61 static VLC total_zeros_vlc[15];
62 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
63 static const int total_zeros_vlc_tables_size = 512;
65 static VLC chroma_dc_total_zeros_vlc[3];
66 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
67 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
69 static VLC run_vlc[6];
70 static VLC_TYPE run_vlc_tables[6][8][2];
71 static const int run_vlc_tables_size = 8;
73 static VLC run7_vlc;
74 static VLC_TYPE run7_vlc_table[96][2];
75 static const int run7_vlc_table_size = 96;
77 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
78 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
79 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static Picture * remove_long(H264Context *h, int i, int ref_mask);
83 static av_always_inline uint32_t pack16to32(int a, int b){
84 #ifdef WORDS_BIGENDIAN
85 return (b&0xFFFF) + (a<<16);
86 #else
87 return (a&0xFFFF) + (b<<16);
88 #endif
91 static const uint8_t rem6[52]={
92 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
95 static const uint8_t div6[52]={
96 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
99 static const int left_block_options[4][8]={
100 {0,1,2,3,7,10,8,11},
101 {2,2,3,3,8,11,8,11},
102 {0,0,1,1,7,10,7,10},
103 {0,2,0,2,7,10,7,10}
106 #define LEVEL_TAB_BITS 8
107 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
110 MpegEncContext * const s = &h->s;
111 const int mb_xy= h->mb_xy;
112 int topleft_xy, top_xy, topright_xy, left_xy[2];
113 int topleft_type, top_type, topright_type, left_type[2];
114 const int * left_block;
115 int topleft_partition= -1;
116 int i;
118 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
120 //FIXME deblocking could skip the intra and nnz parts.
121 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
122 return;
124 /* Wow, what a mess, why didn't they simplify the interlacing & intra
125 * stuff, I can't imagine that these complex rules are worth it. */
127 topleft_xy = top_xy - 1;
128 topright_xy= top_xy + 1;
129 left_xy[1] = left_xy[0] = mb_xy-1;
130 left_block = left_block_options[0];
131 if(FRAME_MBAFF){
132 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
133 const int top_pair_xy = pair_xy - s->mb_stride;
134 const int topleft_pair_xy = top_pair_xy - 1;
135 const int topright_pair_xy = top_pair_xy + 1;
136 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
137 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
138 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
139 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
140 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
141 const int bottom = (s->mb_y & 1);
142 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
144 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
145 top_xy -= s->mb_stride;
147 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
155 topright_xy -= s->mb_stride;
157 if (left_mb_field_flag != curr_mb_field_flag) {
158 left_xy[1] = left_xy[0] = pair_xy - 1;
159 if (curr_mb_field_flag) {
160 left_xy[1] += s->mb_stride;
161 left_block = left_block_options[3];
162 } else {
163 left_block= left_block_options[2 - bottom];
168 h->top_mb_xy = top_xy;
169 h->left_mb_xy[0] = left_xy[0];
170 h->left_mb_xy[1] = left_xy[1];
171 if(for_deblock){
172 topleft_type = 0;
173 topright_type = 0;
174 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
175 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
176 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
178 if(MB_MBAFF && !IS_INTRA(mb_type)){
179 int list;
180 for(list=0; list<h->list_count; list++){
181 //These values where changed for ease of performing MC, we need to change them back
182 //FIXME maybe we can make MC and loop filter use the same values or prevent
183 //the MC code from changing ref_cache and rather use a temporary array.
184 if(USES_LIST(mb_type,list)){
185 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
188 ref += h->b8_stride;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
194 }else{
195 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
196 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
197 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
198 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
199 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
201 if(IS_INTRA(mb_type)){
202 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
203 h->topleft_samples_available=
204 h->top_samples_available=
205 h->left_samples_available= 0xFFFF;
206 h->topright_samples_available= 0xEEEA;
208 if(!(top_type & type_mask)){
209 h->topleft_samples_available= 0xB3FF;
210 h->top_samples_available= 0x33FF;
211 h->topright_samples_available= 0x26EA;
213 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
214 if(IS_INTERLACED(mb_type)){
215 if(!(left_type[0] & type_mask)){
216 h->topleft_samples_available&= 0xDFFF;
217 h->left_samples_available&= 0x5FFF;
219 if(!(left_type[1] & type_mask)){
220 h->topleft_samples_available&= 0xFF5F;
221 h->left_samples_available&= 0xFF5F;
223 }else{
224 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
225 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
226 assert(left_xy[0] == left_xy[1]);
227 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
228 h->topleft_samples_available&= 0xDF5F;
229 h->left_samples_available&= 0x5F5F;
232 }else{
233 if(!(left_type[0] & type_mask)){
234 h->topleft_samples_available&= 0xDF5F;
235 h->left_samples_available&= 0x5F5F;
239 if(!(topleft_type & type_mask))
240 h->topleft_samples_available&= 0x7FFF;
242 if(!(topright_type & type_mask))
243 h->topright_samples_available&= 0xFBFF;
245 if(IS_INTRA4x4(mb_type)){
246 if(IS_INTRA4x4(top_type)){
247 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
248 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
249 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
250 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
251 }else{
252 int pred;
253 if(!(top_type & type_mask))
254 pred= -1;
255 else{
256 pred= 2;
258 h->intra4x4_pred_mode_cache[4+8*0]=
259 h->intra4x4_pred_mode_cache[5+8*0]=
260 h->intra4x4_pred_mode_cache[6+8*0]=
261 h->intra4x4_pred_mode_cache[7+8*0]= pred;
263 for(i=0; i<2; i++){
264 if(IS_INTRA4x4(left_type[i])){
265 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
266 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
267 }else{
268 int pred;
269 if(!(left_type[i] & type_mask))
270 pred= -1;
271 else{
272 pred= 2;
274 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
275 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
284 0 . T T. T T T T
285 1 L . .L . . . .
286 2 L . .L . . . .
287 3 . T TL . . . .
288 4 L . .L . . . .
289 5 L . .. . . . .
291 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
292 if(top_type){
293 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
294 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
295 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
296 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
298 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
299 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
301 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
302 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
304 }else{
305 h->non_zero_count_cache[4+8*0]=
306 h->non_zero_count_cache[5+8*0]=
307 h->non_zero_count_cache[6+8*0]=
308 h->non_zero_count_cache[7+8*0]=
310 h->non_zero_count_cache[1+8*0]=
311 h->non_zero_count_cache[2+8*0]=
313 h->non_zero_count_cache[1+8*3]=
314 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 for (i=0; i<2; i++) {
319 if(left_type[i]){
320 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
321 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
322 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
324 }else{
325 h->non_zero_count_cache[3+8*1 + 2*8*i]=
326 h->non_zero_count_cache[3+8*2 + 2*8*i]=
327 h->non_zero_count_cache[0+8*1 + 8*i]=
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
332 if( h->pps.cabac ) {
333 // top_cbp
334 if(top_type) {
335 h->top_cbp = h->cbp_table[top_xy];
336 } else if(IS_INTRA(mb_type)) {
337 h->top_cbp = 0x1C0;
338 } else {
339 h->top_cbp = 0;
341 // left_cbp
342 if (left_type[0]) {
343 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
344 } else if(IS_INTRA(mb_type)) {
345 h->left_cbp = 0x1C0;
346 } else {
347 h->left_cbp = 0;
349 if (left_type[0]) {
350 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
352 if (left_type[1]) {
353 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
357 #if 1
358 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
359 int list;
360 for(list=0; list<h->list_count; list++){
361 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
362 /*if(!h->mv_cache_clean[list]){
363 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
364 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
365 h->mv_cache_clean[list]= 1;
367 continue;
369 h->mv_cache_clean[list]= 0;
371 if(USES_LIST(top_type, list)){
372 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
373 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
378 h->ref_cache[list][scan8[0] + 0 - 1*8]=
379 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
380 h->ref_cache[list][scan8[0] + 2 - 1*8]=
381 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
382 }else{
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
387 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
390 for(i=0; i<2; i++){
391 int cache_idx = scan8[0] - 1 + i*2*8;
392 if(USES_LIST(left_type[i], list)){
393 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
394 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
395 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
396 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
397 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
398 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
399 }else{
400 *(uint32_t*)h->mv_cache [list][cache_idx ]=
401 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
402 h->ref_cache[list][cache_idx ]=
403 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
407 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
408 continue;
410 if(USES_LIST(topleft_type, list)){
411 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
412 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
413 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
414 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
415 }else{
416 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
417 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
420 if(USES_LIST(topright_type, list)){
421 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
422 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
423 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
424 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
425 }else{
426 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
427 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
430 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
431 continue;
433 h->ref_cache[list][scan8[5 ]+1] =
434 h->ref_cache[list][scan8[7 ]+1] =
435 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
436 h->ref_cache[list][scan8[4 ]] =
437 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
438 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
441 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
442 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
444 if( h->pps.cabac ) {
445 /* XXX beurk, Load mvd */
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
452 }else{
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 if(USES_LIST(left_type[0], list)){
459 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
462 }else{
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 if(USES_LIST(left_type[1], list)){
467 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
470 }else{
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
477 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
478 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
480 if(h->slice_type_nos == FF_B_TYPE){
481 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483 if(IS_DIRECT(top_type)){
484 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
485 }else if(IS_8X8(top_type)){
486 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
487 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
488 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
489 }else{
490 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
493 if(IS_DIRECT(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
495 else if(IS_8X8(left_type[0]))
496 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
497 else
498 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500 if(IS_DIRECT(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
502 else if(IS_8X8(left_type[1]))
503 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
504 else
505 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
509 if(FRAME_MBAFF){
510 #define MAP_MVS\
511 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
512 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
517 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
520 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
521 if(MB_FIELD){
522 #define MAP_F2F(idx, mb_type)\
523 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
524 h->ref_cache[list][idx] <<= 1;\
525 h->mv_cache[list][idx][1] /= 2;\
526 h->mvd_cache[list][idx][1] /= 2;\
528 MAP_MVS
529 #undef MAP_F2F
530 }else{
531 #define MAP_F2F(idx, mb_type)\
532 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
533 h->ref_cache[list][idx] >>= 1;\
534 h->mv_cache[list][idx][1] <<= 1;\
535 h->mvd_cache[list][idx][1] <<= 1;\
537 MAP_MVS
538 #undef MAP_F2F
543 #endif
545 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
548 static inline void write_back_intra_pred_mode(H264Context *h){
549 const int mb_xy= h->mb_xy;
551 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
552 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
553 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
554 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
555 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
556 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
557 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
561 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 static inline int check_intra4x4_pred_mode(H264Context *h){
564 MpegEncContext * const s = &h->s;
565 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
566 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
567 int i;
569 if(!(h->top_samples_available&0x8000)){
570 for(i=0; i<4; i++){
571 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
572 if(status<0){
573 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
574 return -1;
575 } else if(status){
576 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
581 if((h->left_samples_available&0x8888)!=0x8888){
582 static const int mask[4]={0x8000,0x2000,0x80,0x20};
583 for(i=0; i<4; i++){
584 if(!(h->left_samples_available&mask[i])){
585 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
586 if(status<0){
587 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
588 return -1;
589 } else if(status){
590 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
596 return 0;
597 } //FIXME cleanup like next
600 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 static inline int check_intra_pred_mode(H264Context *h, int mode){
603 MpegEncContext * const s = &h->s;
604 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
605 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
607 if(mode > 6U) {
608 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
609 return -1;
612 if(!(h->top_samples_available&0x8000)){
613 mode= top[ mode ];
614 if(mode<0){
615 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
616 return -1;
620 if((h->left_samples_available&0x8080) != 0x8080){
621 mode= left[ mode ];
622 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
623 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
625 if(mode<0){
626 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 return -1;
631 return mode;
635 * gets the predicted intra4x4 prediction mode.
637 static inline int pred_intra_mode(H264Context *h, int n){
638 const int index8= scan8[n];
639 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
640 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
641 const int min= FFMIN(left, top);
643 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
645 if(min<0) return DC_PRED;
646 else return min;
649 static inline void write_back_non_zero_count(H264Context *h){
650 const int mb_xy= h->mb_xy;
652 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
653 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
654 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
655 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
656 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
657 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
658 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
660 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
661 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
662 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
664 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
665 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
666 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
670 * gets the predicted number of non-zero coefficients.
671 * @param n block index
673 static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
677 int i= left + top;
679 if(i<64) i= (i+1)>>1;
681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
683 return i&31;
686 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
688 MpegEncContext *s = &h->s;
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
692 if(FRAME_MBAFF){
693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
694 const int16_t *mv;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
698 if(!MB_FIELD
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
705 if(!USES_LIST(mb_type,list))\
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
718 if(!MB_FIELD
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
722 if(MB_FIELD
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
724 && i >= scan8[0]+8){
725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
729 #undef SET_DIAG_MV
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
734 return topright_ref;
735 }else{
736 tprintf(s->avctx, "topright MV not available\n");
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
750 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
756 const int16_t * C;
757 int diagonal_ref, match_count;
759 assert(part_width==1 || part_width==2 || part_width==4);
761 /* mv_cache
762 B . . A T T T T
763 U . . L . . , .
764 U . . L . . . .
765 U . . L . . , .
766 . . . L . . . .
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
776 if(left_ref==ref){
777 *mx= A[0];
778 *my= A[1];
779 }else if(top_ref==ref){
780 *mx= B[0];
781 *my= B[1];
782 }else{
783 *mx= C[0];
784 *my= C[1];
786 }else{
787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
788 *mx= A[0];
789 *my= A[1];
790 }else{
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
805 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
806 if(n==0){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
812 if(top_ref == ref){
813 *mx= B[0];
814 *my= B[1];
815 return;
817 }else{
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
823 if(left_ref == ref){
824 *mx= A[0];
825 *my= A[1];
826 return;
830 //RARE
831 pred_motion(h, n, 4, list, ref, mx, my);
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
840 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
841 if(n==0){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
847 if(left_ref == ref){
848 *mx= A[0];
849 *my= A[1];
850 return;
852 }else{
853 const int16_t * C;
854 int diagonal_ref;
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860 if(diagonal_ref == ref){
861 *mx= C[0];
862 *my= C[1];
863 return;
867 //RARE
868 pred_motion(h, n, 2, list, ref, mx, my);
871 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
878 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
879 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
881 *mx = *my = 0;
882 return;
885 pred_motion(h, 0, 4, 0, 0, mx, my);
887 return;
890 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
891 int poc0 = h->ref_list[0][i].poc;
892 int td = av_clip(poc1 - poc0, -128, 127);
893 if(td == 0 || h->ref_list[0][i].long_ref){
894 return 256;
895 }else{
896 int tb = av_clip(poc - poc0, -128, 127);
897 int tx = (16384 + (FFABS(td) >> 1)) / td;
898 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
902 static inline void direct_dist_scale_factor(H264Context * const h){
903 MpegEncContext * const s = &h->s;
904 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
905 const int poc1 = h->ref_list[1][0].poc;
906 int i, field;
907 for(field=0; field<2; field++){
908 const int poc = h->s.current_picture_ptr->field_poc[field];
909 const int poc1 = h->ref_list[1][0].field_poc[field];
910 for(i=0; i < 2*h->ref_count[0]; i++)
911 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
919 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 int j, old_ref, rfield;
923 int start= mbafi ? 16 : 0;
924 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
925 int interl= mbafi || s->picture_structure != PICT_FRAME;
927 /* bogus; fills in for missing frames */
928 memset(map[list], 0, sizeof(map[list]));
930 for(rfield=0; rfield<2; rfield++){
931 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
932 int poc = ref1->ref_poc[colfield][list][old_ref];
934 if (!interl)
935 poc |= 3;
936 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
937 poc= (poc&~3) + rfield + 1;
939 for(j=start; j<end; j++){
940 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
941 int cur_ref= mbafi ? (j-16)^field : j;
942 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
943 if(rfield == field)
944 map[list][old_ref] = cur_ref;
945 break;
952 static inline void direct_ref_list_init(H264Context * const h){
953 MpegEncContext * const s = &h->s;
954 Picture * const ref1 = &h->ref_list[1][0];
955 Picture * const cur = s->current_picture_ptr;
956 int list, j, field;
957 int sidx= (s->picture_structure&1)^1;
958 int ref1sidx= (ref1->reference&1)^1;
960 for(list=0; list<2; list++){
961 cur->ref_count[sidx][list] = h->ref_count[list];
962 for(j=0; j<h->ref_count[list]; j++)
963 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
966 if(s->picture_structure == PICT_FRAME){
967 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
968 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
971 cur->mbaff= FRAME_MBAFF;
973 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
974 return;
976 for(list=0; list<2; list++){
977 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
978 for(field=0; field<2; field++)
979 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
983 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
984 MpegEncContext * const s = &h->s;
985 int b8_stride = h->b8_stride;
986 int b4_stride = h->b_stride;
987 int mb_xy = h->mb_xy;
988 int mb_type_col[2];
989 const int16_t (*l1mv0)[2], (*l1mv1)[2];
990 const int8_t *l1ref0, *l1ref1;
991 const int is_b8x8 = IS_8X8(*mb_type);
992 unsigned int sub_mb_type;
993 int i8, i4;
995 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
997 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
998 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
999 int cur_poc = s->current_picture_ptr->poc;
1000 int *col_poc = h->ref_list[1]->field_poc;
1001 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1002 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1003 b8_stride = 0;
1004 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1005 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1006 mb_xy += s->mb_stride*fieldoff;
1008 goto single_col;
1009 }else{ // AFL/AFR/FR/FL -> AFR/FR
1010 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1011 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1012 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1013 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1014 b8_stride *= 3;
1015 b4_stride *= 6;
1016 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1017 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1018 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1019 && !is_b8x8){
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1022 }else{
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 }else{ // AFR/FR -> AFR/FR
1027 single_col:
1028 mb_type_col[0] =
1029 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1030 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1031 /* FIXME save sub mb types from previous frames (or derive from MVs)
1032 * so we know exactly what block size to use */
1033 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1034 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1035 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1036 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1037 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1038 }else{
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1045 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1046 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1047 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1048 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1049 if(!b8_stride){
1050 if(s->mb_y&1){
1051 l1ref0 += h->b8_stride;
1052 l1ref1 += h->b8_stride;
1053 l1mv0 += 2*b4_stride;
1054 l1mv1 += 2*b4_stride;
1058 if(h->direct_spatial_mv_pred){
1059 int ref[2];
1060 int mv[2][2];
1061 int list;
1063 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1065 /* ref = min(neighbors) */
1066 for(list=0; list<2; list++){
1067 int refa = h->ref_cache[list][scan8[0] - 1];
1068 int refb = h->ref_cache[list][scan8[0] - 8];
1069 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1070 if(refc == PART_NOT_AVAILABLE)
1071 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1072 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1073 if(ref[list] < 0)
1074 ref[list] = -1;
1077 if(ref[0] < 0 && ref[1] < 0){
1078 ref[0] = ref[1] = 0;
1079 mv[0][0] = mv[0][1] =
1080 mv[1][0] = mv[1][1] = 0;
1081 }else{
1082 for(list=0; list<2; list++){
1083 if(ref[list] >= 0)
1084 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1085 else
1086 mv[list][0] = mv[list][1] = 0;
1090 if(ref[1] < 0){
1091 if(!is_b8x8)
1092 *mb_type &= ~MB_TYPE_L1;
1093 sub_mb_type &= ~MB_TYPE_L1;
1094 }else if(ref[0] < 0){
1095 if(!is_b8x8)
1096 *mb_type &= ~MB_TYPE_L0;
1097 sub_mb_type &= ~MB_TYPE_L0;
1100 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1101 for(i8=0; i8<4; i8++){
1102 int x8 = i8&1;
1103 int y8 = i8>>1;
1104 int xy8 = x8+y8*b8_stride;
1105 int xy4 = 3*x8+y8*b4_stride;
1106 int a=0, b=0;
1108 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1109 continue;
1110 h->sub_mb_type[i8] = sub_mb_type;
1112 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1114 if(!IS_INTRA(mb_type_col[y8])
1115 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1116 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1117 if(ref[0] > 0)
1118 a= pack16to32(mv[0][0],mv[0][1]);
1119 if(ref[1] > 0)
1120 b= pack16to32(mv[1][0],mv[1][1]);
1121 }else{
1122 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1126 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1128 }else if(IS_16X16(*mb_type)){
1129 int a=0, b=0;
1131 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1132 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1133 if(!IS_INTRA(mb_type_col[0])
1134 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1135 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1136 && (h->x264_build>33 || !h->x264_build)))){
1137 if(ref[0] > 0)
1138 a= pack16to32(mv[0][0],mv[0][1]);
1139 if(ref[1] > 0)
1140 b= pack16to32(mv[1][0],mv[1][1]);
1141 }else{
1142 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1146 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1147 }else{
1148 for(i8=0; i8<4; i8++){
1149 const int x8 = i8&1;
1150 const int y8 = i8>>1;
1152 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1153 continue;
1154 h->sub_mb_type[i8] = sub_mb_type;
1156 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1158 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1159 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1161 /* col_zero_flag */
1162 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1163 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1164 && (h->x264_build>33 || !h->x264_build)))){
1165 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1166 if(IS_SUB_8X8(sub_mb_type)){
1167 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1168 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1169 if(ref[0] == 0)
1170 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1171 if(ref[1] == 0)
1172 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 }else
1175 for(i4=0; i4<4; i4++){
1176 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1177 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1178 if(ref[0] == 0)
1179 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1180 if(ref[1] == 0)
1181 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1187 }else{ /* direct temporal mv pred */
1188 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1189 const int *dist_scale_factor = h->dist_scale_factor;
1190 int ref_offset= 0;
1192 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1193 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1194 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1195 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1197 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1198 ref_offset += 16;
1200 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1201 /* FIXME assumes direct_8x8_inference == 1 */
1202 int y_shift = 2*!IS_INTERLACED(*mb_type);
1204 for(i8=0; i8<4; i8++){
1205 const int x8 = i8&1;
1206 const int y8 = i8>>1;
1207 int ref0, scale;
1208 const int16_t (*l1mv)[2]= l1mv0;
1210 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1211 continue;
1212 h->sub_mb_type[i8] = sub_mb_type;
1214 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 if(IS_INTRA(mb_type_col[y8])){
1216 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1217 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1218 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1219 continue;
1222 ref0 = l1ref0[x8 + y8*b8_stride];
1223 if(ref0 >= 0)
1224 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1225 else{
1226 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1227 l1mv= l1mv1;
1229 scale = dist_scale_factor[ref0];
1230 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1233 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1234 int my_col = (mv_col[1]<<y_shift)/2;
1235 int mx = (scale * mv_col[0] + 128) >> 8;
1236 int my = (scale * my_col + 128) >> 8;
1237 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1238 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1241 return;
1244 /* one-to-one mv scaling */
1246 if(IS_16X16(*mb_type)){
1247 int ref, mv0, mv1;
1249 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1250 if(IS_INTRA(mb_type_col[0])){
1251 ref=mv0=mv1=0;
1252 }else{
1253 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1254 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1255 const int scale = dist_scale_factor[ref0];
1256 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1257 int mv_l0[2];
1258 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1259 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1260 ref= ref0;
1261 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1262 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1264 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1265 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1266 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1267 }else{
1268 for(i8=0; i8<4; i8++){
1269 const int x8 = i8&1;
1270 const int y8 = i8>>1;
1271 int ref0, scale;
1272 const int16_t (*l1mv)[2]= l1mv0;
1274 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1275 continue;
1276 h->sub_mb_type[i8] = sub_mb_type;
1277 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col[0])){
1279 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1280 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1281 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1282 continue;
1285 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1286 if(ref0 >= 0)
1287 ref0 = map_col_to_list0[0][ref0];
1288 else{
1289 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1290 l1mv= l1mv1;
1292 scale = dist_scale_factor[ref0];
1294 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1295 if(IS_SUB_8X8(sub_mb_type)){
1296 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1297 int mx = (scale * mv_col[0] + 128) >> 8;
1298 int my = (scale * mv_col[1] + 128) >> 8;
1299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1301 }else
1302 for(i4=0; i4<4; i4++){
1303 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1304 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1305 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1306 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1307 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1308 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1315 static inline void write_back_motion(H264Context *h, int mb_type){
1316 MpegEncContext * const s = &h->s;
1317 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1318 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1319 int list;
1321 if(!USES_LIST(mb_type, 0))
1322 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1324 for(list=0; list<h->list_count; list++){
1325 int y;
1326 if(!USES_LIST(mb_type, list))
1327 continue;
1329 for(y=0; y<4; y++){
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1333 if( h->pps.cabac ) {
1334 if(IS_SKIP(mb_type))
1335 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1336 else
1337 for(y=0; y<4; y++){
1338 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1344 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1345 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1346 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1347 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1348 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1352 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1353 if(IS_8X8(mb_type)){
1354 uint8_t *direct_table = &h->direct_table[b8_xy];
1355 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1356 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1357 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1363 * Decodes a network abstraction layer unit.
1364 * @param consumed is the number of bytes used as input
1365 * @param length is the length of the array
1366 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1367 * @returns decoded bytes, might be src+1 if no escapes
1369 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1370 int i, si, di;
1371 uint8_t *dst;
1372 int bufidx;
1374 // src[0]&0x80; //forbidden bit
1375 h->nal_ref_idc= src[0]>>5;
1376 h->nal_unit_type= src[0]&0x1F;
1378 src++; length--;
1379 #if 0
1380 for(i=0; i<length; i++)
1381 printf("%2X ", src[i]);
1382 #endif
1384 #if HAVE_FAST_UNALIGNED
1385 # if HAVE_FAST_64BIT
1386 # define RS 7
1387 for(i=0; i+1<length; i+=9){
1388 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1389 # else
1390 # define RS 3
1391 for(i=0; i+1<length; i+=5){
1392 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1393 # endif
1394 continue;
1395 if(i>0 && !src[i]) i--;
1396 while(src[i]) i++;
1397 #else
1398 # define RS 0
1399 for(i=0; i+1<length; i+=2){
1400 if(src[i]) continue;
1401 if(i>0 && src[i-1]==0) i--;
1402 #endif
1403 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1404 if(src[i+2]!=3){
1405 /* startcode, so we must be past the end */
1406 length=i;
1408 break;
1410 i-= RS;
1413 if(i>=length-1){ //no escaped 0
1414 *dst_length= length;
1415 *consumed= length+1; //+1 for the header
1416 return src;
1419 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1420 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1421 dst= h->rbsp_buffer[bufidx];
1423 if (dst == NULL){
1424 return NULL;
1427 //printf("decoding esc\n");
1428 memcpy(dst, src, i);
1429 si=di=i;
1430 while(si+2<length){
1431 //remove escapes (very rare 1:2^22)
1432 if(src[si+2]>3){
1433 dst[di++]= src[si++];
1434 dst[di++]= src[si++];
1435 }else if(src[si]==0 && src[si+1]==0){
1436 if(src[si+2]==3){ //escape
1437 dst[di++]= 0;
1438 dst[di++]= 0;
1439 si+=3;
1440 continue;
1441 }else //next start code
1442 goto nsc;
1445 dst[di++]= src[si++];
1447 while(si<length)
1448 dst[di++]= src[si++];
1449 nsc:
1451 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1453 *dst_length= di;
1454 *consumed= si + 1;//+1 for the header
1455 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1456 return dst;
1460 * identifies the exact end of the bitstream
1461 * @return the length of the trailing, or 0 if damaged
1463 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1464 int v= *src;
1465 int r;
1467 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1469 for(r=1; r<9; r++){
1470 if(v&1) return r;
1471 v>>=1;
1473 return 0;
1477 * IDCT transforms the 16 dc values and dequantizes them.
1478 * @param qp quantization parameter
1480 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1481 #define stride 16
1482 int i;
1483 int temp[16]; //FIXME check if this is a good idea
1484 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1485 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1487 //memset(block, 64, 2*256);
1488 //return;
1489 for(i=0; i<4; i++){
1490 const int offset= y_offset[i];
1491 const int z0= block[offset+stride*0] + block[offset+stride*4];
1492 const int z1= block[offset+stride*0] - block[offset+stride*4];
1493 const int z2= block[offset+stride*1] - block[offset+stride*5];
1494 const int z3= block[offset+stride*1] + block[offset+stride*5];
1496 temp[4*i+0]= z0+z3;
1497 temp[4*i+1]= z1+z2;
1498 temp[4*i+2]= z1-z2;
1499 temp[4*i+3]= z0-z3;
1502 for(i=0; i<4; i++){
1503 const int offset= x_offset[i];
1504 const int z0= temp[4*0+i] + temp[4*2+i];
1505 const int z1= temp[4*0+i] - temp[4*2+i];
1506 const int z2= temp[4*1+i] - temp[4*3+i];
1507 const int z3= temp[4*1+i] + temp[4*3+i];
1509 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1510 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1511 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1512 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1516 #if 0
1518 * DCT transforms the 16 dc values.
1519 * @param qp quantization parameter ??? FIXME
1521 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1522 // const int qmul= dequant_coeff[qp][0];
1523 int i;
1524 int temp[16]; //FIXME check if this is a good idea
1525 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1526 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1528 for(i=0; i<4; i++){
1529 const int offset= y_offset[i];
1530 const int z0= block[offset+stride*0] + block[offset+stride*4];
1531 const int z1= block[offset+stride*0] - block[offset+stride*4];
1532 const int z2= block[offset+stride*1] - block[offset+stride*5];
1533 const int z3= block[offset+stride*1] + block[offset+stride*5];
1535 temp[4*i+0]= z0+z3;
1536 temp[4*i+1]= z1+z2;
1537 temp[4*i+2]= z1-z2;
1538 temp[4*i+3]= z0-z3;
1541 for(i=0; i<4; i++){
1542 const int offset= x_offset[i];
1543 const int z0= temp[4*0+i] + temp[4*2+i];
1544 const int z1= temp[4*0+i] - temp[4*2+i];
1545 const int z2= temp[4*1+i] - temp[4*3+i];
1546 const int z3= temp[4*1+i] + temp[4*3+i];
1548 block[stride*0 +offset]= (z0 + z3)>>1;
1549 block[stride*2 +offset]= (z1 + z2)>>1;
1550 block[stride*8 +offset]= (z1 - z2)>>1;
1551 block[stride*10+offset]= (z0 - z3)>>1;
1554 #endif
1556 #undef xStride
1557 #undef stride
1559 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1560 const int stride= 16*2;
1561 const int xStride= 16;
1562 int a,b,c,d,e;
1564 a= block[stride*0 + xStride*0];
1565 b= block[stride*0 + xStride*1];
1566 c= block[stride*1 + xStride*0];
1567 d= block[stride*1 + xStride*1];
1569 e= a-b;
1570 a= a+b;
1571 b= c-d;
1572 c= c+d;
1574 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1575 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1576 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1577 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1580 #if 0
1581 static void chroma_dc_dct_c(DCTELEM *block){
1582 const int stride= 16*2;
1583 const int xStride= 16;
1584 int a,b,c,d,e;
1586 a= block[stride*0 + xStride*0];
1587 b= block[stride*0 + xStride*1];
1588 c= block[stride*1 + xStride*0];
1589 d= block[stride*1 + xStride*1];
1591 e= a-b;
1592 a= a+b;
1593 b= c-d;
1594 c= c+d;
1596 block[stride*0 + xStride*0]= (a+c);
1597 block[stride*0 + xStride*1]= (e+b);
1598 block[stride*1 + xStride*0]= (a-c);
1599 block[stride*1 + xStride*1]= (e-b);
1601 #endif
1604 * gets the chroma qp.
1606 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1607 return h->pps.chroma_qp_table[t][qscale];
1610 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1611 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1612 int src_x_offset, int src_y_offset,
1613 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1614 MpegEncContext * const s = &h->s;
1615 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1616 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1617 const int luma_xy= (mx&3) + ((my&3)<<2);
1618 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1619 uint8_t * src_cb, * src_cr;
1620 int extra_width= h->emu_edge_width;
1621 int extra_height= h->emu_edge_height;
1622 int emu=0;
1623 const int full_mx= mx>>2;
1624 const int full_my= my>>2;
1625 const int pic_width = 16*s->mb_width;
1626 const int pic_height = 16*s->mb_height >> MB_FIELD;
1628 if(mx&7) extra_width -= 3;
1629 if(my&7) extra_height -= 3;
1631 if( full_mx < 0-extra_width
1632 || full_my < 0-extra_height
1633 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1634 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1635 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1636 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1637 emu=1;
1640 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1641 if(!square){
1642 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1645 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1647 if(MB_FIELD){
1648 // chroma offset when predicting from a field of opposite parity
1649 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1650 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1652 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1655 if(emu){
1656 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1657 src_cb= s->edge_emu_buffer;
1659 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1661 if(emu){
1662 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1663 src_cr= s->edge_emu_buffer;
1665 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1668 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1669 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1670 int x_offset, int y_offset,
1671 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1672 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1673 int list0, int list1){
1674 MpegEncContext * const s = &h->s;
1675 qpel_mc_func *qpix_op= qpix_put;
1676 h264_chroma_mc_func chroma_op= chroma_put;
1678 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1679 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1680 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1681 x_offset += 8*s->mb_x;
1682 y_offset += 8*(s->mb_y >> MB_FIELD);
1684 if(list0){
1685 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1690 qpix_op= qpix_avg;
1691 chroma_op= chroma_avg;
1694 if(list1){
1695 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1696 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1697 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1698 qpix_op, chroma_op);
1702 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1703 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1704 int x_offset, int y_offset,
1705 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1706 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1707 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1708 int list0, int list1){
1709 MpegEncContext * const s = &h->s;
1711 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1712 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1713 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1714 x_offset += 8*s->mb_x;
1715 y_offset += 8*(s->mb_y >> MB_FIELD);
1717 if(list0 && list1){
1718 /* don't optimize for luma-only case, since B-frames usually
1719 * use implicit weights => chroma too. */
1720 uint8_t *tmp_cb = s->obmc_scratchpad;
1721 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1722 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1723 int refn0 = h->ref_cache[0][ scan8[n] ];
1724 int refn1 = h->ref_cache[1][ scan8[n] ];
1726 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1727 dest_y, dest_cb, dest_cr,
1728 x_offset, y_offset, qpix_put, chroma_put);
1729 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1730 tmp_y, tmp_cb, tmp_cr,
1731 x_offset, y_offset, qpix_put, chroma_put);
1733 if(h->use_weight == 2){
1734 int weight0 = h->implicit_weight[refn0][refn1];
1735 int weight1 = 64 - weight0;
1736 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1737 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1739 }else{
1740 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1741 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1742 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1743 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1745 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1746 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1748 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1750 }else{
1751 int list = list1 ? 1 : 0;
1752 int refn = h->ref_cache[list][ scan8[n] ];
1753 Picture *ref= &h->ref_list[list][refn];
1754 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1755 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1756 qpix_put, chroma_put);
1758 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1759 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1760 if(h->use_weight_chroma){
1761 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1762 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1763 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1764 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1769 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1770 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1771 int x_offset, int y_offset,
1772 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1773 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1774 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1775 int list0, int list1){
1776 if((h->use_weight==2 && list0 && list1
1777 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1778 || h->use_weight==1)
1779 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1780 x_offset, y_offset, qpix_put, chroma_put,
1781 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1782 else
1783 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1784 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1787 static inline void prefetch_motion(H264Context *h, int list){
1788 /* fetch pixels for estimated mv 4 macroblocks ahead
1789 * optimized for 64byte cache lines */
1790 MpegEncContext * const s = &h->s;
1791 const int refn = h->ref_cache[list][scan8[0]];
1792 if(refn >= 0){
1793 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1794 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1795 uint8_t **src= h->ref_list[list][refn].data;
1796 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1797 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1798 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1799 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1803 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1804 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1805 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1806 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1807 MpegEncContext * const s = &h->s;
1808 const int mb_xy= h->mb_xy;
1809 const int mb_type= s->current_picture.mb_type[mb_xy];
1811 assert(IS_INTER(mb_type));
1813 prefetch_motion(h, 0);
1815 if(IS_16X16(mb_type)){
1816 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1817 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1818 &weight_op[0], &weight_avg[0],
1819 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1820 }else if(IS_16X8(mb_type)){
1821 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1822 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1823 &weight_op[1], &weight_avg[1],
1824 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1825 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1826 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1827 &weight_op[1], &weight_avg[1],
1828 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1829 }else if(IS_8X16(mb_type)){
1830 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1832 &weight_op[2], &weight_avg[2],
1833 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1834 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1835 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1836 &weight_op[2], &weight_avg[2],
1837 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1838 }else{
1839 int i;
1841 assert(IS_8X8(mb_type));
1843 for(i=0; i<4; i++){
1844 const int sub_mb_type= h->sub_mb_type[i];
1845 const int n= 4*i;
1846 int x_offset= (i&1)<<2;
1847 int y_offset= (i&2)<<1;
1849 if(IS_SUB_8X8(sub_mb_type)){
1850 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1851 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1852 &weight_op[3], &weight_avg[3],
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 }else if(IS_SUB_8X4(sub_mb_type)){
1855 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1856 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1857 &weight_op[4], &weight_avg[4],
1858 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1859 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1860 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1861 &weight_op[4], &weight_avg[4],
1862 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1863 }else if(IS_SUB_4X8(sub_mb_type)){
1864 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1865 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1866 &weight_op[5], &weight_avg[5],
1867 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1868 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870 &weight_op[5], &weight_avg[5],
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1872 }else{
1873 int j;
1874 assert(IS_SUB_4X4(sub_mb_type));
1875 for(j=0; j<4; j++){
1876 int sub_x_offset= x_offset + 2*(j&1);
1877 int sub_y_offset= y_offset + (j&2);
1878 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1879 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1880 &weight_op[6], &weight_avg[6],
1881 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1887 prefetch_motion(h, 1);
1890 static av_cold void init_cavlc_level_tab(void){
1891 int suffix_length, mask;
1892 unsigned int i;
1894 for(suffix_length=0; suffix_length<7; suffix_length++){
1895 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1896 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1897 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1899 mask= -(level_code&1);
1900 level_code= (((2+level_code)>>1) ^ mask) - mask;
1901 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1902 cavlc_level_tab[suffix_length][i][0]= level_code;
1903 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1904 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1905 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1906 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1907 }else{
1908 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1909 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1915 static av_cold void decode_init_vlc(void){
1916 static int done = 0;
1918 if (!done) {
1919 int i;
1920 int offset;
1921 done = 1;
1923 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1924 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1925 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1926 &chroma_dc_coeff_token_len [0], 1, 1,
1927 &chroma_dc_coeff_token_bits[0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1930 offset = 0;
1931 for(i=0; i<4; i++){
1932 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1933 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1934 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1935 &coeff_token_len [i][0], 1, 1,
1936 &coeff_token_bits[i][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1938 offset += coeff_token_vlc_tables_size[i];
1941 * This is a one time safety check to make sure that
1942 * the packed static coeff_token_vlc table sizes
1943 * were initialized correctly.
1945 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1947 for(i=0; i<3; i++){
1948 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1949 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1950 init_vlc(&chroma_dc_total_zeros_vlc[i],
1951 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1952 &chroma_dc_total_zeros_len [i][0], 1, 1,
1953 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1954 INIT_VLC_USE_NEW_STATIC);
1956 for(i=0; i<15; i++){
1957 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1958 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1959 init_vlc(&total_zeros_vlc[i],
1960 TOTAL_ZEROS_VLC_BITS, 16,
1961 &total_zeros_len [i][0], 1, 1,
1962 &total_zeros_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
1966 for(i=0; i<6; i++){
1967 run_vlc[i].table = run_vlc_tables[i];
1968 run_vlc[i].table_allocated = run_vlc_tables_size;
1969 init_vlc(&run_vlc[i],
1970 RUN_VLC_BITS, 7,
1971 &run_len [i][0], 1, 1,
1972 &run_bits[i][0], 1, 1,
1973 INIT_VLC_USE_NEW_STATIC);
1975 run7_vlc.table = run7_vlc_table,
1976 run7_vlc.table_allocated = run7_vlc_table_size;
1977 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1978 &run_len [6][0], 1, 1,
1979 &run_bits[6][0], 1, 1,
1980 INIT_VLC_USE_NEW_STATIC);
1982 init_cavlc_level_tab();
1986 static void free_tables(H264Context *h){
1987 int i;
1988 H264Context *hx;
1989 av_freep(&h->intra4x4_pred_mode);
1990 av_freep(&h->chroma_pred_mode_table);
1991 av_freep(&h->cbp_table);
1992 av_freep(&h->mvd_table[0]);
1993 av_freep(&h->mvd_table[1]);
1994 av_freep(&h->direct_table);
1995 av_freep(&h->non_zero_count);
1996 av_freep(&h->slice_table_base);
1997 h->slice_table= NULL;
1999 av_freep(&h->mb2b_xy);
2000 av_freep(&h->mb2b8_xy);
2002 for(i = 0; i < h->s.avctx->thread_count; i++) {
2003 hx = h->thread_context[i];
2004 if(!hx) continue;
2005 av_freep(&hx->top_borders[1]);
2006 av_freep(&hx->top_borders[0]);
2007 av_freep(&hx->s.obmc_scratchpad);
2011 static void init_dequant8_coeff_table(H264Context *h){
2012 int i,q,x;
2013 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2014 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2015 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2017 for(i=0; i<2; i++ ){
2018 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2019 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2020 break;
2023 for(q=0; q<52; q++){
2024 int shift = div6[q];
2025 int idx = rem6[q];
2026 for(x=0; x<64; x++)
2027 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2028 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2029 h->pps.scaling_matrix8[i][x]) << shift;
2034 static void init_dequant4_coeff_table(H264Context *h){
2035 int i,j,q,x;
2036 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2037 for(i=0; i<6; i++ ){
2038 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2039 for(j=0; j<i; j++){
2040 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2041 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2042 break;
2045 if(j<i)
2046 continue;
2048 for(q=0; q<52; q++){
2049 int shift = div6[q] + 2;
2050 int idx = rem6[q];
2051 for(x=0; x<16; x++)
2052 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2053 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2054 h->pps.scaling_matrix4[i][x]) << shift;
2059 static void init_dequant_tables(H264Context *h){
2060 int i,x;
2061 init_dequant4_coeff_table(h);
2062 if(h->pps.transform_8x8_mode)
2063 init_dequant8_coeff_table(h);
2064 if(h->sps.transform_bypass){
2065 for(i=0; i<6; i++)
2066 for(x=0; x<16; x++)
2067 h->dequant4_coeff[i][0][x] = 1<<6;
2068 if(h->pps.transform_8x8_mode)
2069 for(i=0; i<2; i++)
2070 for(x=0; x<64; x++)
2071 h->dequant8_coeff[i][0][x] = 1<<6;
2077 * allocates tables.
2078 * needs width/height
2080 static int alloc_tables(H264Context *h){
2081 MpegEncContext * const s = &h->s;
2082 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2083 int x,y;
2085 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2087 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2088 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2089 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2091 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2092 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2093 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2094 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2096 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2097 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2099 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2100 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2101 for(y=0; y<s->mb_height; y++){
2102 for(x=0; x<s->mb_width; x++){
2103 const int mb_xy= x + y*s->mb_stride;
2104 const int b_xy = 4*x + 4*y*h->b_stride;
2105 const int b8_xy= 2*x + 2*y*h->b8_stride;
2107 h->mb2b_xy [mb_xy]= b_xy;
2108 h->mb2b8_xy[mb_xy]= b8_xy;
2112 s->obmc_scratchpad = NULL;
2114 if(!h->dequant4_coeff[0])
2115 init_dequant_tables(h);
2117 return 0;
2118 fail:
2119 free_tables(h);
2120 return -1;
2124 * Mimic alloc_tables(), but for every context thread.
2126 static void clone_tables(H264Context *dst, H264Context *src){
2127 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2128 dst->non_zero_count = src->non_zero_count;
2129 dst->slice_table = src->slice_table;
2130 dst->cbp_table = src->cbp_table;
2131 dst->mb2b_xy = src->mb2b_xy;
2132 dst->mb2b8_xy = src->mb2b8_xy;
2133 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2134 dst->mvd_table[0] = src->mvd_table[0];
2135 dst->mvd_table[1] = src->mvd_table[1];
2136 dst->direct_table = src->direct_table;
2138 dst->s.obmc_scratchpad = NULL;
2139 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2143 * Init context
2144 * Allocate buffers which are not shared amongst multiple threads.
2146 static int context_init(H264Context *h){
2147 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2148 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2150 return 0;
2151 fail:
2152 return -1; // free_tables will clean up for us
2155 static av_cold void common_init(H264Context *h){
2156 MpegEncContext * const s = &h->s;
2158 s->width = s->avctx->width;
2159 s->height = s->avctx->height;
2160 s->codec_id= s->avctx->codec->id;
2162 ff_h264_pred_init(&h->hpc, s->codec_id);
2164 h->dequant_coeff_pps= -1;
2165 s->unrestricted_mv=1;
2166 s->decode=1; //FIXME
2168 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2170 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2171 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2174 static av_cold int decode_init(AVCodecContext *avctx){
2175 H264Context *h= avctx->priv_data;
2176 MpegEncContext * const s = &h->s;
2178 MPV_decode_defaults(s);
2180 s->avctx = avctx;
2181 common_init(h);
2183 s->out_format = FMT_H264;
2184 s->workaround_bugs= avctx->workaround_bugs;
2186 // set defaults
2187 // s->decode_mb= ff_h263_decode_mb;
2188 s->quarter_sample = 1;
2189 s->low_delay= 1;
2191 if(avctx->codec_id == CODEC_ID_SVQ3)
2192 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2193 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2194 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2195 else
2196 avctx->pix_fmt= PIX_FMT_YUV420P;
2198 decode_init_vlc();
2200 if(avctx->extradata_size > 0 && avctx->extradata &&
2201 *(char *)avctx->extradata == 1){
2202 h->is_avc = 1;
2203 h->got_avcC = 0;
2204 } else {
2205 h->is_avc = 0;
2208 h->thread_context[0] = h;
2209 h->outputed_poc = INT_MIN;
2210 h->prev_poc_msb= 1<<16;
2211 return 0;
2214 static int frame_start(H264Context *h){
2215 MpegEncContext * const s = &h->s;
2216 int i;
2218 if(MPV_frame_start(s, s->avctx) < 0)
2219 return -1;
2220 ff_er_frame_start(s);
2222 * MPV_frame_start uses pict_type to derive key_frame.
2223 * This is incorrect for H.264; IDR markings must be used.
2224 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2225 * See decode_nal_units().
2227 s->current_picture_ptr->key_frame= 0;
2229 assert(s->linesize && s->uvlinesize);
2231 for(i=0; i<16; i++){
2232 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2233 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2235 for(i=0; i<4; i++){
2236 h->block_offset[16+i]=
2237 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+16+i]=
2239 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2242 /* can't be in alloc_tables because linesize isn't known there.
2243 * FIXME: redo bipred weight to not require extra buffer? */
2244 for(i = 0; i < s->avctx->thread_count; i++)
2245 if(!h->thread_context[i]->s.obmc_scratchpad)
2246 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2248 /* some macroblocks will be accessed before they're available */
2249 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2250 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2252 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2254 // We mark the current picture as non-reference after allocating it, so
2255 // that if we break out due to an error it can be released automatically
2256 // in the next MPV_frame_start().
2257 // SVQ3 as well as most other codecs have only last/next/current and thus
2258 // get released even with set reference, besides SVQ3 and others do not
2259 // mark frames as reference later "naturally".
2260 if(s->codec_id != CODEC_ID_SVQ3)
2261 s->current_picture_ptr->reference= 0;
2263 s->current_picture_ptr->field_poc[0]=
2264 s->current_picture_ptr->field_poc[1]= INT_MAX;
2265 assert(s->current_picture_ptr->long_ref==0);
2267 return 0;
2270 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2271 MpegEncContext * const s = &h->s;
2272 int i;
2273 int step = 1;
2274 int offset = 1;
2275 int uvoffset= 1;
2276 int top_idx = 1;
2277 int skiplast= 0;
2279 src_y -= linesize;
2280 src_cb -= uvlinesize;
2281 src_cr -= uvlinesize;
2283 if(!simple && FRAME_MBAFF){
2284 if(s->mb_y&1){
2285 offset = MB_MBAFF ? 1 : 17;
2286 uvoffset= MB_MBAFF ? 1 : 9;
2287 if(!MB_MBAFF){
2288 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2289 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2290 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2295 }else{
2296 if(!MB_MBAFF){
2297 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2298 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2299 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2300 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2302 skiplast= 1;
2304 offset =
2305 uvoffset=
2306 top_idx = MB_MBAFF ? 0 : 1;
2308 step= MB_MBAFF ? 2 : 1;
2311 // There are two lines saved, the line above the the top macroblock of a pair,
2312 // and the line above the bottom macroblock
2313 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2314 for(i=1; i<17 - skiplast; i++){
2315 h->left_border[offset+i*step]= src_y[15+i* linesize];
2318 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2319 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2321 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2322 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2323 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2324 for(i=1; i<9 - skiplast; i++){
2325 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2326 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2328 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2329 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2333 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2334 MpegEncContext * const s = &h->s;
2335 int temp8, i;
2336 uint64_t temp64;
2337 int deblock_left;
2338 int deblock_top;
2339 int mb_xy;
2340 int step = 1;
2341 int offset = 1;
2342 int uvoffset= 1;
2343 int top_idx = 1;
2345 if(!simple && FRAME_MBAFF){
2346 if(s->mb_y&1){
2347 offset = MB_MBAFF ? 1 : 17;
2348 uvoffset= MB_MBAFF ? 1 : 9;
2349 }else{
2350 offset =
2351 uvoffset=
2352 top_idx = MB_MBAFF ? 0 : 1;
2354 step= MB_MBAFF ? 2 : 1;
2357 if(h->deblocking_filter == 2) {
2358 mb_xy = h->mb_xy;
2359 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2360 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2361 } else {
2362 deblock_left = (s->mb_x > 0);
2363 deblock_top = (s->mb_y > !!MB_FIELD);
2366 src_y -= linesize + 1;
2367 src_cb -= uvlinesize + 1;
2368 src_cr -= uvlinesize + 1;
2370 #define XCHG(a,b,t,xchg)\
2371 t= a;\
2372 if(xchg)\
2373 a= b;\
2374 b= t;
2376 if(deblock_left){
2377 for(i = !deblock_top; i<16; i++){
2378 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2380 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2383 if(deblock_top){
2384 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2385 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2386 if(s->mb_x+1 < s->mb_width){
2387 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2391 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2392 if(deblock_left){
2393 for(i = !deblock_top; i<8; i++){
2394 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2395 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2397 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2398 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2400 if(deblock_top){
2401 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2402 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2407 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2408 MpegEncContext * const s = &h->s;
2409 const int mb_x= s->mb_x;
2410 const int mb_y= s->mb_y;
2411 const int mb_xy= h->mb_xy;
2412 const int mb_type= s->current_picture.mb_type[mb_xy];
2413 uint8_t *dest_y, *dest_cb, *dest_cr;
2414 int linesize, uvlinesize /*dct_offset*/;
2415 int i;
2416 int *block_offset = &h->block_offset[0];
2417 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2418 /* is_h264 should always be true if SVQ3 is disabled. */
2419 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2420 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2421 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2423 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2424 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2425 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2427 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2428 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2430 if (!simple && MB_FIELD) {
2431 linesize = h->mb_linesize = s->linesize * 2;
2432 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2433 block_offset = &h->block_offset[24];
2434 if(mb_y&1){ //FIXME move out of this function?
2435 dest_y -= s->linesize*15;
2436 dest_cb-= s->uvlinesize*7;
2437 dest_cr-= s->uvlinesize*7;
2439 if(FRAME_MBAFF) {
2440 int list;
2441 for(list=0; list<h->list_count; list++){
2442 if(!USES_LIST(mb_type, list))
2443 continue;
2444 if(IS_16X16(mb_type)){
2445 int8_t *ref = &h->ref_cache[list][scan8[0]];
2446 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2447 }else{
2448 for(i=0; i<16; i+=4){
2449 int ref = h->ref_cache[list][scan8[i]];
2450 if(ref >= 0)
2451 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2456 } else {
2457 linesize = h->mb_linesize = s->linesize;
2458 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2459 // dct_offset = s->linesize * 16;
2462 if (!simple && IS_INTRA_PCM(mb_type)) {
2463 for (i=0; i<16; i++) {
2464 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2466 for (i=0; i<8; i++) {
2467 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2468 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2470 } else {
2471 if(IS_INTRA(mb_type)){
2472 if(h->deblocking_filter)
2473 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2475 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2476 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2477 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2480 if(IS_INTRA4x4(mb_type)){
2481 if(simple || !s->encoding){
2482 if(IS_8x8DCT(mb_type)){
2483 if(transform_bypass){
2484 idct_dc_add =
2485 idct_add = s->dsp.add_pixels8;
2486 }else{
2487 idct_dc_add = s->dsp.h264_idct8_dc_add;
2488 idct_add = s->dsp.h264_idct8_add;
2490 for(i=0; i<16; i+=4){
2491 uint8_t * const ptr= dest_y + block_offset[i];
2492 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2493 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2494 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2495 }else{
2496 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2497 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2498 (h->topright_samples_available<<i)&0x4000, linesize);
2499 if(nnz){
2500 if(nnz == 1 && h->mb[i*16])
2501 idct_dc_add(ptr, h->mb + i*16, linesize);
2502 else
2503 idct_add (ptr, h->mb + i*16, linesize);
2507 }else{
2508 if(transform_bypass){
2509 idct_dc_add =
2510 idct_add = s->dsp.add_pixels4;
2511 }else{
2512 idct_dc_add = s->dsp.h264_idct_dc_add;
2513 idct_add = s->dsp.h264_idct_add;
2515 for(i=0; i<16; i++){
2516 uint8_t * const ptr= dest_y + block_offset[i];
2517 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2519 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2520 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2521 }else{
2522 uint8_t *topright;
2523 int nnz, tr;
2524 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2525 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2526 assert(mb_y || linesize <= block_offset[i]);
2527 if(!topright_avail){
2528 tr= ptr[3 - linesize]*0x01010101;
2529 topright= (uint8_t*) &tr;
2530 }else
2531 topright= ptr + 4 - linesize;
2532 }else
2533 topright= NULL;
2535 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2536 nnz = h->non_zero_count_cache[ scan8[i] ];
2537 if(nnz){
2538 if(is_h264){
2539 if(nnz == 1 && h->mb[i*16])
2540 idct_dc_add(ptr, h->mb + i*16, linesize);
2541 else
2542 idct_add (ptr, h->mb + i*16, linesize);
2543 }else
2544 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2550 }else{
2551 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2552 if(is_h264){
2553 if(!transform_bypass)
2554 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2555 }else
2556 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2558 if(h->deblocking_filter)
2559 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2560 }else if(is_h264){
2561 hl_motion(h, dest_y, dest_cb, dest_cr,
2562 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2563 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2564 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2568 if(!IS_INTRA4x4(mb_type)){
2569 if(is_h264){
2570 if(IS_INTRA16x16(mb_type)){
2571 if(transform_bypass){
2572 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2573 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2574 }else{
2575 for(i=0; i<16; i++){
2576 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2577 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2580 }else{
2581 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2583 }else if(h->cbp&15){
2584 if(transform_bypass){
2585 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2586 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2587 for(i=0; i<16; i+=di){
2588 if(h->non_zero_count_cache[ scan8[i] ]){
2589 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2592 }else{
2593 if(IS_8x8DCT(mb_type)){
2594 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2595 }else{
2596 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2600 }else{
2601 for(i=0; i<16; i++){
2602 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2603 uint8_t * const ptr= dest_y + block_offset[i];
2604 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2610 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2611 uint8_t *dest[2] = {dest_cb, dest_cr};
2612 if(transform_bypass){
2613 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2614 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2615 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2616 }else{
2617 idct_add = s->dsp.add_pixels4;
2618 for(i=16; i<16+8; i++){
2619 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2620 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2623 }else{
2624 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2625 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2626 if(is_h264){
2627 idct_add = s->dsp.h264_idct_add;
2628 idct_dc_add = s->dsp.h264_idct_dc_add;
2629 for(i=16; i<16+8; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ])
2631 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2632 else if(h->mb[i*16])
2633 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2635 }else{
2636 for(i=16; i<16+8; i++){
2637 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2638 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2639 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2646 if(h->cbp || IS_INTRA(mb_type))
2647 s->dsp.clear_blocks(h->mb);
2649 if(h->deblocking_filter) {
2650 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2651 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2652 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2653 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2654 if (!simple && FRAME_MBAFF) {
2655 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2656 } else {
2657 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2663 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2665 static void hl_decode_mb_simple(H264Context *h){
2666 hl_decode_mb_internal(h, 1);
2670 * Process a macroblock; this handles edge cases, such as interlacing.
2672 static void av_noinline hl_decode_mb_complex(H264Context *h){
2673 hl_decode_mb_internal(h, 0);
2676 static void hl_decode_mb(H264Context *h){
2677 MpegEncContext * const s = &h->s;
2678 const int mb_xy= h->mb_xy;
2679 const int mb_type= s->current_picture.mb_type[mb_xy];
2680 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2682 if (is_complex)
2683 hl_decode_mb_complex(h);
2684 else hl_decode_mb_simple(h);
2687 static void pic_as_field(Picture *pic, const int parity){
2688 int i;
2689 for (i = 0; i < 4; ++i) {
2690 if (parity == PICT_BOTTOM_FIELD)
2691 pic->data[i] += pic->linesize[i];
2692 pic->reference = parity;
2693 pic->linesize[i] *= 2;
2695 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2698 static int split_field_copy(Picture *dest, Picture *src,
2699 int parity, int id_add){
2700 int match = !!(src->reference & parity);
2702 if (match) {
2703 *dest = *src;
2704 if(parity != PICT_FRAME){
2705 pic_as_field(dest, parity);
2706 dest->pic_id *= 2;
2707 dest->pic_id += id_add;
2711 return match;
2714 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2715 int i[2]={0};
2716 int index=0;
2718 while(i[0]<len || i[1]<len){
2719 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2720 i[0]++;
2721 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2722 i[1]++;
2723 if(i[0] < len){
2724 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2725 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2727 if(i[1] < len){
2728 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2729 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2733 return index;
2736 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2737 int i, best_poc;
2738 int out_i= 0;
2740 for(;;){
2741 best_poc= dir ? INT_MIN : INT_MAX;
2743 for(i=0; i<len; i++){
2744 const int poc= src[i]->poc;
2745 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2746 best_poc= poc;
2747 sorted[out_i]= src[i];
2750 if(best_poc == (dir ? INT_MIN : INT_MAX))
2751 break;
2752 limit= sorted[out_i++]->poc - dir;
2754 return out_i;
2758 * fills the default_ref_list.
2760 static int fill_default_ref_list(H264Context *h){
2761 MpegEncContext * const s = &h->s;
2762 int i, len;
2764 if(h->slice_type_nos==FF_B_TYPE){
2765 Picture *sorted[32];
2766 int cur_poc, list;
2767 int lens[2];
2769 if(FIELD_PICTURE)
2770 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2771 else
2772 cur_poc= s->current_picture_ptr->poc;
2774 for(list= 0; list<2; list++){
2775 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2776 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2777 assert(len<=32);
2778 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2779 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2780 assert(len<=32);
2782 if(len < h->ref_count[list])
2783 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2784 lens[list]= len;
2787 if(lens[0] == lens[1] && lens[1] > 1){
2788 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2789 if(i == lens[0])
2790 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2792 }else{
2793 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2794 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2795 assert(len <= 32);
2796 if(len < h->ref_count[0])
2797 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2799 #ifdef TRACE
2800 for (i=0; i<h->ref_count[0]; i++) {
2801 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2803 if(h->slice_type_nos==FF_B_TYPE){
2804 for (i=0; i<h->ref_count[1]; i++) {
2805 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2808 #endif
2809 return 0;
2812 static void print_short_term(H264Context *h);
2813 static void print_long_term(H264Context *h);
2816 * Extract structure information about the picture described by pic_num in
2817 * the current decoding context (frame or field). Note that pic_num is
2818 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2819 * @param pic_num picture number for which to extract structure information
2820 * @param structure one of PICT_XXX describing structure of picture
2821 * with pic_num
2822 * @return frame number (short term) or long term index of picture
2823 * described by pic_num
2825 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2826 MpegEncContext * const s = &h->s;
2828 *structure = s->picture_structure;
2829 if(FIELD_PICTURE){
2830 if (!(pic_num & 1))
2831 /* opposite field */
2832 *structure ^= PICT_FRAME;
2833 pic_num >>= 1;
2836 return pic_num;
2839 static int decode_ref_pic_list_reordering(H264Context *h){
2840 MpegEncContext * const s = &h->s;
2841 int list, index, pic_structure;
2843 print_short_term(h);
2844 print_long_term(h);
2846 for(list=0; list<h->list_count; list++){
2847 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2849 if(get_bits1(&s->gb)){
2850 int pred= h->curr_pic_num;
2852 for(index=0; ; index++){
2853 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2854 unsigned int pic_id;
2855 int i;
2856 Picture *ref = NULL;
2858 if(reordering_of_pic_nums_idc==3)
2859 break;
2861 if(index >= h->ref_count[list]){
2862 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2863 return -1;
2866 if(reordering_of_pic_nums_idc<3){
2867 if(reordering_of_pic_nums_idc<2){
2868 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2869 int frame_num;
2871 if(abs_diff_pic_num > h->max_pic_num){
2872 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2873 return -1;
2876 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2877 else pred+= abs_diff_pic_num;
2878 pred &= h->max_pic_num - 1;
2880 frame_num = pic_num_extract(h, pred, &pic_structure);
2882 for(i= h->short_ref_count-1; i>=0; i--){
2883 ref = h->short_ref[i];
2884 assert(ref->reference);
2885 assert(!ref->long_ref);
2887 ref->frame_num == frame_num &&
2888 (ref->reference & pic_structure)
2890 break;
2892 if(i>=0)
2893 ref->pic_id= pred;
2894 }else{
2895 int long_idx;
2896 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2898 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2900 if(long_idx>31){
2901 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2902 return -1;
2904 ref = h->long_ref[long_idx];
2905 assert(!(ref && !ref->reference));
2906 if(ref && (ref->reference & pic_structure)){
2907 ref->pic_id= pic_id;
2908 assert(ref->long_ref);
2909 i=0;
2910 }else{
2911 i=-1;
2915 if (i < 0) {
2916 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2917 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2918 } else {
2919 for(i=index; i+1<h->ref_count[list]; i++){
2920 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2921 break;
2923 for(; i > index; i--){
2924 h->ref_list[list][i]= h->ref_list[list][i-1];
2926 h->ref_list[list][index]= *ref;
2927 if (FIELD_PICTURE){
2928 pic_as_field(&h->ref_list[list][index], pic_structure);
2931 }else{
2932 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2933 return -1;
2938 for(list=0; list<h->list_count; list++){
2939 for(index= 0; index < h->ref_count[list]; index++){
2940 if(!h->ref_list[list][index].data[0]){
2941 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2942 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2947 return 0;
2950 static void fill_mbaff_ref_list(H264Context *h){
2951 int list, i, j;
2952 for(list=0; list<2; list++){ //FIXME try list_count
2953 for(i=0; i<h->ref_count[list]; i++){
2954 Picture *frame = &h->ref_list[list][i];
2955 Picture *field = &h->ref_list[list][16+2*i];
2956 field[0] = *frame;
2957 for(j=0; j<3; j++)
2958 field[0].linesize[j] <<= 1;
2959 field[0].reference = PICT_TOP_FIELD;
2960 field[0].poc= field[0].field_poc[0];
2961 field[1] = field[0];
2962 for(j=0; j<3; j++)
2963 field[1].data[j] += frame->linesize[j];
2964 field[1].reference = PICT_BOTTOM_FIELD;
2965 field[1].poc= field[1].field_poc[1];
2967 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2968 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2969 for(j=0; j<2; j++){
2970 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2971 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2975 for(j=0; j<h->ref_count[1]; j++){
2976 for(i=0; i<h->ref_count[0]; i++)
2977 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2978 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2979 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2983 static int pred_weight_table(H264Context *h){
2984 MpegEncContext * const s = &h->s;
2985 int list, i;
2986 int luma_def, chroma_def;
2988 h->use_weight= 0;
2989 h->use_weight_chroma= 0;
2990 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2991 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2992 luma_def = 1<<h->luma_log2_weight_denom;
2993 chroma_def = 1<<h->chroma_log2_weight_denom;
2995 for(list=0; list<2; list++){
2996 h->luma_weight_flag[list] = 0;
2997 h->chroma_weight_flag[list] = 0;
2998 for(i=0; i<h->ref_count[list]; i++){
2999 int luma_weight_flag, chroma_weight_flag;
3001 luma_weight_flag= get_bits1(&s->gb);
3002 if(luma_weight_flag){
3003 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3004 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3005 if( h->luma_weight[list][i] != luma_def
3006 || h->luma_offset[list][i] != 0) {
3007 h->use_weight= 1;
3008 h->luma_weight_flag[list]= 1;
3010 }else{
3011 h->luma_weight[list][i]= luma_def;
3012 h->luma_offset[list][i]= 0;
3015 if(CHROMA){
3016 chroma_weight_flag= get_bits1(&s->gb);
3017 if(chroma_weight_flag){
3018 int j;
3019 for(j=0; j<2; j++){
3020 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3021 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3022 if( h->chroma_weight[list][i][j] != chroma_def
3023 || h->chroma_offset[list][i][j] != 0) {
3024 h->use_weight_chroma= 1;
3025 h->chroma_weight_flag[list]= 1;
3028 }else{
3029 int j;
3030 for(j=0; j<2; j++){
3031 h->chroma_weight[list][i][j]= chroma_def;
3032 h->chroma_offset[list][i][j]= 0;
3037 if(h->slice_type_nos != FF_B_TYPE) break;
3039 h->use_weight= h->use_weight || h->use_weight_chroma;
3040 return 0;
3043 static void implicit_weight_table(H264Context *h){
3044 MpegEncContext * const s = &h->s;
3045 int ref0, ref1, i;
3046 int cur_poc = s->current_picture_ptr->poc;
3048 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3049 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3050 h->use_weight= 0;
3051 h->use_weight_chroma= 0;
3052 return;
3055 h->use_weight= 2;
3056 h->use_weight_chroma= 2;
3057 h->luma_log2_weight_denom= 5;
3058 h->chroma_log2_weight_denom= 5;
3059 for (i = 0; i < 2; i++) {
3060 h->luma_weight_flag[i] = 0;
3061 h->chroma_weight_flag[i] = 0;
3064 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3065 int poc0 = h->ref_list[0][ref0].poc;
3066 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3067 int poc1 = h->ref_list[1][ref1].poc;
3068 int td = av_clip(poc1 - poc0, -128, 127);
3069 if(td){
3070 int tb = av_clip(cur_poc - poc0, -128, 127);
3071 int tx = (16384 + (FFABS(td) >> 1)) / td;
3072 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3073 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3074 h->implicit_weight[ref0][ref1] = 32;
3075 else
3076 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3077 }else
3078 h->implicit_weight[ref0][ref1] = 32;
3084 * Mark a picture as no longer needed for reference. The refmask
3085 * argument allows unreferencing of individual fields or the whole frame.
3086 * If the picture becomes entirely unreferenced, but is being held for
3087 * display purposes, it is marked as such.
3088 * @param refmask mask of fields to unreference; the mask is bitwise
3089 * anded with the reference marking of pic
3090 * @return non-zero if pic becomes entirely unreferenced (except possibly
3091 * for display purposes) zero if one of the fields remains in
3092 * reference
3094 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3095 int i;
3096 if (pic->reference &= refmask) {
3097 return 0;
3098 } else {
3099 for(i = 0; h->delayed_pic[i]; i++)
3100 if(pic == h->delayed_pic[i]){
3101 pic->reference=DELAYED_PIC_REF;
3102 break;
3104 return 1;
3109 * instantaneous decoder refresh.
3111 static void idr(H264Context *h){
3112 int i;
3114 for(i=0; i<16; i++){
3115 remove_long(h, i, 0);
3117 assert(h->long_ref_count==0);
3119 for(i=0; i<h->short_ref_count; i++){
3120 unreference_pic(h, h->short_ref[i], 0);
3121 h->short_ref[i]= NULL;
3123 h->short_ref_count=0;
3124 h->prev_frame_num= 0;
3125 h->prev_frame_num_offset= 0;
3126 h->prev_poc_msb=
3127 h->prev_poc_lsb= 0;
3130 /* forget old pics after a seek */
3131 static void flush_dpb(AVCodecContext *avctx){
3132 H264Context *h= avctx->priv_data;
3133 int i;
3134 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3135 if(h->delayed_pic[i])
3136 h->delayed_pic[i]->reference= 0;
3137 h->delayed_pic[i]= NULL;
3139 h->outputed_poc= INT_MIN;
3140 idr(h);
3141 if(h->s.current_picture_ptr)
3142 h->s.current_picture_ptr->reference= 0;
3143 h->s.first_field= 0;
3144 ff_mpeg_flush(avctx);
3148 * Find a Picture in the short term reference list by frame number.
3149 * @param frame_num frame number to search for
3150 * @param idx the index into h->short_ref where returned picture is found
3151 * undefined if no picture found.
3152 * @return pointer to the found picture, or NULL if no pic with the provided
3153 * frame number is found
3155 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3156 MpegEncContext * const s = &h->s;
3157 int i;
3159 for(i=0; i<h->short_ref_count; i++){
3160 Picture *pic= h->short_ref[i];
3161 if(s->avctx->debug&FF_DEBUG_MMCO)
3162 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3163 if(pic->frame_num == frame_num) {
3164 *idx = i;
3165 return pic;
3168 return NULL;
3172 * Remove a picture from the short term reference list by its index in
3173 * that list. This does no checking on the provided index; it is assumed
3174 * to be valid. Other list entries are shifted down.
3175 * @param i index into h->short_ref of picture to remove.
3177 static void remove_short_at_index(H264Context *h, int i){
3178 assert(i >= 0 && i < h->short_ref_count);
3179 h->short_ref[i]= NULL;
3180 if (--h->short_ref_count)
3181 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3186 * @return the removed picture or NULL if an error occurs
3188 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3189 MpegEncContext * const s = &h->s;
3190 Picture *pic;
3191 int i;
3193 if(s->avctx->debug&FF_DEBUG_MMCO)
3194 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3196 pic = find_short(h, frame_num, &i);
3197 if (pic){
3198 if(unreference_pic(h, pic, ref_mask))
3199 remove_short_at_index(h, i);
3202 return pic;
3206 * Remove a picture from the long term reference list by its index in
3207 * that list.
3208 * @return the removed picture or NULL if an error occurs
3210 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3211 Picture *pic;
3213 pic= h->long_ref[i];
3214 if (pic){
3215 if(unreference_pic(h, pic, ref_mask)){
3216 assert(h->long_ref[i]->long_ref == 1);
3217 h->long_ref[i]->long_ref= 0;
3218 h->long_ref[i]= NULL;
3219 h->long_ref_count--;
3223 return pic;
3227 * print short term list
3229 static void print_short_term(H264Context *h) {
3230 uint32_t i;
3231 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3232 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3233 for(i=0; i<h->short_ref_count; i++){
3234 Picture *pic= h->short_ref[i];
3235 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3241 * print long term list
3243 static void print_long_term(H264Context *h) {
3244 uint32_t i;
3245 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3246 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3247 for(i = 0; i < 16; i++){
3248 Picture *pic= h->long_ref[i];
3249 if (pic) {
3250 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3257 * Executes the reference picture marking (memory management control operations).
3259 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3260 MpegEncContext * const s = &h->s;
3261 int i, j;
3262 int current_ref_assigned=0;
3263 Picture *pic;
3265 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3266 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3268 for(i=0; i<mmco_count; i++){
3269 int structure, frame_num;
3270 if(s->avctx->debug&FF_DEBUG_MMCO)
3271 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3273 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3274 || mmco[i].opcode == MMCO_SHORT2LONG){
3275 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3276 pic = find_short(h, frame_num, &j);
3277 if(!pic){
3278 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3279 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3280 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3281 continue;
3285 switch(mmco[i].opcode){
3286 case MMCO_SHORT2UNUSED:
3287 if(s->avctx->debug&FF_DEBUG_MMCO)
3288 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3289 remove_short(h, frame_num, structure ^ PICT_FRAME);
3290 break;
3291 case MMCO_SHORT2LONG:
3292 if (h->long_ref[mmco[i].long_arg] != pic)
3293 remove_long(h, mmco[i].long_arg, 0);
3295 remove_short_at_index(h, j);
3296 h->long_ref[ mmco[i].long_arg ]= pic;
3297 if (h->long_ref[ mmco[i].long_arg ]){
3298 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3299 h->long_ref_count++;
3301 break;
3302 case MMCO_LONG2UNUSED:
3303 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3304 pic = h->long_ref[j];
3305 if (pic) {
3306 remove_long(h, j, structure ^ PICT_FRAME);
3307 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3308 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3309 break;
3310 case MMCO_LONG:
3311 // Comment below left from previous code as it is an interresting note.
3312 /* First field in pair is in short term list or
3313 * at a different long term index.
3314 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3315 * Report the problem and keep the pair where it is,
3316 * and mark this field valid.
3319 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3320 remove_long(h, mmco[i].long_arg, 0);
3322 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3323 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3324 h->long_ref_count++;
3327 s->current_picture_ptr->reference |= s->picture_structure;
3328 current_ref_assigned=1;
3329 break;
3330 case MMCO_SET_MAX_LONG:
3331 assert(mmco[i].long_arg <= 16);
3332 // just remove the long term which index is greater than new max
3333 for(j = mmco[i].long_arg; j<16; j++){
3334 remove_long(h, j, 0);
3336 break;
3337 case MMCO_RESET:
3338 while(h->short_ref_count){
3339 remove_short(h, h->short_ref[0]->frame_num, 0);
3341 for(j = 0; j < 16; j++) {
3342 remove_long(h, j, 0);
3344 s->current_picture_ptr->poc=
3345 s->current_picture_ptr->field_poc[0]=
3346 s->current_picture_ptr->field_poc[1]=
3347 h->poc_lsb=
3348 h->poc_msb=
3349 h->frame_num=
3350 s->current_picture_ptr->frame_num= 0;
3351 break;
3352 default: assert(0);
3356 if (!current_ref_assigned) {
3357 /* Second field of complementary field pair; the first field of
3358 * which is already referenced. If short referenced, it
3359 * should be first entry in short_ref. If not, it must exist
3360 * in long_ref; trying to put it on the short list here is an
3361 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3363 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3364 /* Just mark the second field valid */
3365 s->current_picture_ptr->reference = PICT_FRAME;
3366 } else if (s->current_picture_ptr->long_ref) {
3367 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3368 "assignment for second field "
3369 "in complementary field pair "
3370 "(first field is long term)\n");
3371 } else {
3372 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3373 if(pic){
3374 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3377 if(h->short_ref_count)
3378 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3380 h->short_ref[0]= s->current_picture_ptr;
3381 h->short_ref_count++;
3382 s->current_picture_ptr->reference |= s->picture_structure;
3386 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3388 /* We have too many reference frames, probably due to corrupted
3389 * stream. Need to discard one frame. Prevents overrun of the
3390 * short_ref and long_ref buffers.
3392 av_log(h->s.avctx, AV_LOG_ERROR,
3393 "number of reference frames exceeds max (probably "
3394 "corrupt input), discarding one\n");
3396 if (h->long_ref_count && !h->short_ref_count) {
3397 for (i = 0; i < 16; ++i)
3398 if (h->long_ref[i])
3399 break;
3401 assert(i < 16);
3402 remove_long(h, i, 0);
3403 } else {
3404 pic = h->short_ref[h->short_ref_count - 1];
3405 remove_short(h, pic->frame_num, 0);
3409 print_short_term(h);
3410 print_long_term(h);
3411 return 0;
3414 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3415 MpegEncContext * const s = &h->s;
3416 int i;
3418 h->mmco_index= 0;
3419 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3420 s->broken_link= get_bits1(gb) -1;
3421 if(get_bits1(gb)){
3422 h->mmco[0].opcode= MMCO_LONG;
3423 h->mmco[0].long_arg= 0;
3424 h->mmco_index= 1;
3426 }else{
3427 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3428 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3429 MMCOOpcode opcode= get_ue_golomb_31(gb);
3431 h->mmco[i].opcode= opcode;
3432 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3433 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3434 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3435 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3436 return -1;
3439 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3440 unsigned int long_arg= get_ue_golomb_31(gb);
3441 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3442 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3443 return -1;
3445 h->mmco[i].long_arg= long_arg;
3448 if(opcode > (unsigned)MMCO_LONG){
3449 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3450 return -1;
3452 if(opcode == MMCO_END)
3453 break;
3455 h->mmco_index= i;
3456 }else{
3457 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3459 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3460 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3461 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3462 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3463 h->mmco_index= 1;
3464 if (FIELD_PICTURE) {
3465 h->mmco[0].short_pic_num *= 2;
3466 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3467 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3468 h->mmco_index= 2;
3474 return 0;
3477 static int init_poc(H264Context *h){
3478 MpegEncContext * const s = &h->s;
3479 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3480 int field_poc[2];
3481 Picture *cur = s->current_picture_ptr;
3483 h->frame_num_offset= h->prev_frame_num_offset;
3484 if(h->frame_num < h->prev_frame_num)
3485 h->frame_num_offset += max_frame_num;
3487 if(h->sps.poc_type==0){
3488 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3490 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3491 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3492 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3493 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3494 else
3495 h->poc_msb = h->prev_poc_msb;
3496 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3497 field_poc[0] =
3498 field_poc[1] = h->poc_msb + h->poc_lsb;
3499 if(s->picture_structure == PICT_FRAME)
3500 field_poc[1] += h->delta_poc_bottom;
3501 }else if(h->sps.poc_type==1){
3502 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3503 int i;
3505 if(h->sps.poc_cycle_length != 0)
3506 abs_frame_num = h->frame_num_offset + h->frame_num;
3507 else
3508 abs_frame_num = 0;
3510 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3511 abs_frame_num--;
3513 expected_delta_per_poc_cycle = 0;
3514 for(i=0; i < h->sps.poc_cycle_length; i++)
3515 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3517 if(abs_frame_num > 0){
3518 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3519 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3521 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3522 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3523 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3524 } else
3525 expectedpoc = 0;
3527 if(h->nal_ref_idc == 0)
3528 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3530 field_poc[0] = expectedpoc + h->delta_poc[0];
3531 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3533 if(s->picture_structure == PICT_FRAME)
3534 field_poc[1] += h->delta_poc[1];
3535 }else{
3536 int poc= 2*(h->frame_num_offset + h->frame_num);
3538 if(!h->nal_ref_idc)
3539 poc--;
3541 field_poc[0]= poc;
3542 field_poc[1]= poc;
3545 if(s->picture_structure != PICT_BOTTOM_FIELD)
3546 s->current_picture_ptr->field_poc[0]= field_poc[0];
3547 if(s->picture_structure != PICT_TOP_FIELD)
3548 s->current_picture_ptr->field_poc[1]= field_poc[1];
3549 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3551 return 0;
3556 * initialize scan tables
3558 static void init_scan_tables(H264Context *h){
3559 MpegEncContext * const s = &h->s;
3560 int i;
3561 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3562 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3563 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3564 }else{
3565 for(i=0; i<16; i++){
3566 #define T(x) (x>>2) | ((x<<2) & 0xF)
3567 h->zigzag_scan[i] = T(zigzag_scan[i]);
3568 h-> field_scan[i] = T( field_scan[i]);
3569 #undef T
3572 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3573 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3574 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3575 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3576 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3577 }else{
3578 for(i=0; i<64; i++){
3579 #define T(x) (x>>3) | ((x&7)<<3)
3580 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3581 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3582 h->field_scan8x8[i] = T(field_scan8x8[i]);
3583 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3584 #undef T
3587 if(h->sps.transform_bypass){ //FIXME same ugly
3588 h->zigzag_scan_q0 = zigzag_scan;
3589 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3590 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3591 h->field_scan_q0 = field_scan;
3592 h->field_scan8x8_q0 = field_scan8x8;
3593 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3594 }else{
3595 h->zigzag_scan_q0 = h->zigzag_scan;
3596 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3597 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3598 h->field_scan_q0 = h->field_scan;
3599 h->field_scan8x8_q0 = h->field_scan8x8;
3600 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3605 * Replicates H264 "master" context to thread contexts.
3607 static void clone_slice(H264Context *dst, H264Context *src)
3609 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3610 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3611 dst->s.current_picture = src->s.current_picture;
3612 dst->s.linesize = src->s.linesize;
3613 dst->s.uvlinesize = src->s.uvlinesize;
3614 dst->s.first_field = src->s.first_field;
3616 dst->prev_poc_msb = src->prev_poc_msb;
3617 dst->prev_poc_lsb = src->prev_poc_lsb;
3618 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3619 dst->prev_frame_num = src->prev_frame_num;
3620 dst->short_ref_count = src->short_ref_count;
3622 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3623 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3624 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3625 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3627 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3628 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3632 * decodes a slice header.
3633 * This will also call MPV_common_init() and frame_start() as needed.
3635 * @param h h264context
3636 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3638 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3640 static int decode_slice_header(H264Context *h, H264Context *h0){
3641 MpegEncContext * const s = &h->s;
3642 MpegEncContext * const s0 = &h0->s;
3643 unsigned int first_mb_in_slice;
3644 unsigned int pps_id;
3645 int num_ref_idx_active_override_flag;
3646 unsigned int slice_type, tmp, i, j;
3647 int default_ref_list_done = 0;
3648 int last_pic_structure;
3650 s->dropable= h->nal_ref_idc == 0;
3652 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3653 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3654 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3655 }else{
3656 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3657 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3660 first_mb_in_slice= get_ue_golomb(&s->gb);
3662 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3663 h0->current_slice = 0;
3664 if (!s0->first_field)
3665 s->current_picture_ptr= NULL;
3668 slice_type= get_ue_golomb_31(&s->gb);
3669 if(slice_type > 9){
3670 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3671 return -1;
3673 if(slice_type > 4){
3674 slice_type -= 5;
3675 h->slice_type_fixed=1;
3676 }else
3677 h->slice_type_fixed=0;
3679 slice_type= golomb_to_pict_type[ slice_type ];
3680 if (slice_type == FF_I_TYPE
3681 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3682 default_ref_list_done = 1;
3684 h->slice_type= slice_type;
3685 h->slice_type_nos= slice_type & 3;
3687 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3688 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3689 av_log(h->s.avctx, AV_LOG_ERROR,
3690 "B picture before any references, skipping\n");
3691 return -1;
3694 pps_id= get_ue_golomb(&s->gb);
3695 if(pps_id>=MAX_PPS_COUNT){
3696 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3697 return -1;
3699 if(!h0->pps_buffers[pps_id]) {
3700 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3701 return -1;
3703 h->pps= *h0->pps_buffers[pps_id];
3705 if(!h0->sps_buffers[h->pps.sps_id]) {
3706 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3707 return -1;
3709 h->sps = *h0->sps_buffers[h->pps.sps_id];
3711 if(h == h0 && h->dequant_coeff_pps != pps_id){
3712 h->dequant_coeff_pps = pps_id;
3713 init_dequant_tables(h);
3716 s->mb_width= h->sps.mb_width;
3717 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3719 h->b_stride= s->mb_width*4;
3720 h->b8_stride= s->mb_width*2;
3722 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3723 if(h->sps.frame_mbs_only_flag)
3724 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3725 else
3726 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3728 if (s->context_initialized
3729 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3730 if(h != h0)
3731 return -1; // width / height changed during parallelized decoding
3732 free_tables(h);
3733 flush_dpb(s->avctx);
3734 MPV_common_end(s);
3736 if (!s->context_initialized) {
3737 if(h != h0)
3738 return -1; // we cant (re-)initialize context during parallel decoding
3739 if (MPV_common_init(s) < 0)
3740 return -1;
3741 s->first_field = 0;
3743 init_scan_tables(h);
3744 alloc_tables(h);
3746 for(i = 1; i < s->avctx->thread_count; i++) {
3747 H264Context *c;
3748 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3749 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3750 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3751 c->sps = h->sps;
3752 c->pps = h->pps;
3753 init_scan_tables(c);
3754 clone_tables(c, h);
3757 for(i = 0; i < s->avctx->thread_count; i++)
3758 if(context_init(h->thread_context[i]) < 0)
3759 return -1;
3761 s->avctx->width = s->width;
3762 s->avctx->height = s->height;
3763 s->avctx->sample_aspect_ratio= h->sps.sar;
3764 if(!s->avctx->sample_aspect_ratio.den)
3765 s->avctx->sample_aspect_ratio.den = 1;
3767 if(h->sps.timing_info_present_flag){
3768 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3769 if(h->x264_build > 0 && h->x264_build < 44)
3770 s->avctx->time_base.den *= 2;
3771 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3772 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3776 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3778 h->mb_mbaff = 0;
3779 h->mb_aff_frame = 0;
3780 last_pic_structure = s0->picture_structure;
3781 if(h->sps.frame_mbs_only_flag){
3782 s->picture_structure= PICT_FRAME;
3783 }else{
3784 if(get_bits1(&s->gb)) { //field_pic_flag
3785 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3786 } else {
3787 s->picture_structure= PICT_FRAME;
3788 h->mb_aff_frame = h->sps.mb_aff;
3791 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3793 if(h0->current_slice == 0){
3794 while(h->frame_num != h->prev_frame_num &&
3795 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3796 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3797 frame_start(h);
3798 h->prev_frame_num++;
3799 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3800 s->current_picture_ptr->frame_num= h->prev_frame_num;
3801 execute_ref_pic_marking(h, NULL, 0);
3804 /* See if we have a decoded first field looking for a pair... */
3805 if (s0->first_field) {
3806 assert(s0->current_picture_ptr);
3807 assert(s0->current_picture_ptr->data[0]);
3808 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3810 /* figure out if we have a complementary field pair */
3811 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3813 * Previous field is unmatched. Don't display it, but let it
3814 * remain for reference if marked as such.
3816 s0->current_picture_ptr = NULL;
3817 s0->first_field = FIELD_PICTURE;
3819 } else {
3820 if (h->nal_ref_idc &&
3821 s0->current_picture_ptr->reference &&
3822 s0->current_picture_ptr->frame_num != h->frame_num) {
3824 * This and previous field were reference, but had
3825 * different frame_nums. Consider this field first in
3826 * pair. Throw away previous field except for reference
3827 * purposes.
3829 s0->first_field = 1;
3830 s0->current_picture_ptr = NULL;
3832 } else {
3833 /* Second field in complementary pair */
3834 s0->first_field = 0;
3838 } else {
3839 /* Frame or first field in a potentially complementary pair */
3840 assert(!s0->current_picture_ptr);
3841 s0->first_field = FIELD_PICTURE;
3844 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3845 s0->first_field = 0;
3846 return -1;
3849 if(h != h0)
3850 clone_slice(h, h0);
3852 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3854 assert(s->mb_num == s->mb_width * s->mb_height);
3855 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3856 first_mb_in_slice >= s->mb_num){
3857 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3858 return -1;
3860 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3861 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3862 if (s->picture_structure == PICT_BOTTOM_FIELD)
3863 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3864 assert(s->mb_y < s->mb_height);
3866 if(s->picture_structure==PICT_FRAME){
3867 h->curr_pic_num= h->frame_num;
3868 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3869 }else{
3870 h->curr_pic_num= 2*h->frame_num + 1;
3871 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3874 if(h->nal_unit_type == NAL_IDR_SLICE){
3875 get_ue_golomb(&s->gb); /* idr_pic_id */
3878 if(h->sps.poc_type==0){
3879 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3881 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3882 h->delta_poc_bottom= get_se_golomb(&s->gb);
3886 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3887 h->delta_poc[0]= get_se_golomb(&s->gb);
3889 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3890 h->delta_poc[1]= get_se_golomb(&s->gb);
3893 init_poc(h);
3895 if(h->pps.redundant_pic_cnt_present){
3896 h->redundant_pic_count= get_ue_golomb(&s->gb);
3899 //set defaults, might be overridden a few lines later
3900 h->ref_count[0]= h->pps.ref_count[0];
3901 h->ref_count[1]= h->pps.ref_count[1];
3903 if(h->slice_type_nos != FF_I_TYPE){
3904 if(h->slice_type_nos == FF_B_TYPE){
3905 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3907 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3909 if(num_ref_idx_active_override_flag){
3910 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3911 if(h->slice_type_nos==FF_B_TYPE)
3912 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3914 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3915 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3916 h->ref_count[0]= h->ref_count[1]= 1;
3917 return -1;
3920 if(h->slice_type_nos == FF_B_TYPE)
3921 h->list_count= 2;
3922 else
3923 h->list_count= 1;
3924 }else
3925 h->list_count= 0;
3927 if(!default_ref_list_done){
3928 fill_default_ref_list(h);
3931 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3932 return -1;
3934 if(h->slice_type_nos!=FF_I_TYPE){
3935 s->last_picture_ptr= &h->ref_list[0][0];
3936 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3938 if(h->slice_type_nos==FF_B_TYPE){
3939 s->next_picture_ptr= &h->ref_list[1][0];
3940 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3943 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3944 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3945 pred_weight_table(h);
3946 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3947 implicit_weight_table(h);
3948 else {
3949 h->use_weight = 0;
3950 for (i = 0; i < 2; i++) {
3951 h->luma_weight_flag[i] = 0;
3952 h->chroma_weight_flag[i] = 0;
3956 if(h->nal_ref_idc)
3957 decode_ref_pic_marking(h0, &s->gb);
3959 if(FRAME_MBAFF)
3960 fill_mbaff_ref_list(h);
3962 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3963 direct_dist_scale_factor(h);
3964 direct_ref_list_init(h);
3966 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3967 tmp = get_ue_golomb_31(&s->gb);
3968 if(tmp > 2){
3969 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3970 return -1;
3972 h->cabac_init_idc= tmp;
3975 h->last_qscale_diff = 0;
3976 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3977 if(tmp>51){
3978 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3979 return -1;
3981 s->qscale= tmp;
3982 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3983 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3984 //FIXME qscale / qp ... stuff
3985 if(h->slice_type == FF_SP_TYPE){
3986 get_bits1(&s->gb); /* sp_for_switch_flag */
3988 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3989 get_se_golomb(&s->gb); /* slice_qs_delta */
3992 h->deblocking_filter = 1;
3993 h->slice_alpha_c0_offset = 0;
3994 h->slice_beta_offset = 0;
3995 if( h->pps.deblocking_filter_parameters_present ) {
3996 tmp= get_ue_golomb_31(&s->gb);
3997 if(tmp > 2){
3998 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3999 return -1;
4001 h->deblocking_filter= tmp;
4002 if(h->deblocking_filter < 2)
4003 h->deblocking_filter^= 1; // 1<->0
4005 if( h->deblocking_filter ) {
4006 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4007 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4011 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4012 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4013 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4014 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4015 h->deblocking_filter= 0;
4017 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4018 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4019 /* Cheat slightly for speed:
4020 Do not bother to deblock across slices. */
4021 h->deblocking_filter = 2;
4022 } else {
4023 h0->max_contexts = 1;
4024 if(!h0->single_decode_warning) {
4025 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4026 h0->single_decode_warning = 1;
4028 if(h != h0)
4029 return 1; // deblocking switched inside frame
4033 #if 0 //FMO
4034 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4035 slice_group_change_cycle= get_bits(&s->gb, ?);
4036 #endif
4038 h0->last_slice_type = slice_type;
4039 h->slice_num = ++h0->current_slice;
4040 if(h->slice_num >= MAX_SLICES){
4041 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4044 for(j=0; j<2; j++){
4045 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4046 ref2frm[0]=
4047 ref2frm[1]= -1;
4048 for(i=0; i<16; i++)
4049 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4050 +(h->ref_list[j][i].reference&3);
4051 ref2frm[18+0]=
4052 ref2frm[18+1]= -1;
4053 for(i=16; i<48; i++)
4054 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4055 +(h->ref_list[j][i].reference&3);
4058 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4059 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4061 s->avctx->refs= h->sps.ref_frame_count;
4063 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4064 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4065 h->slice_num,
4066 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4067 first_mb_in_slice,
4068 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4069 pps_id, h->frame_num,
4070 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4071 h->ref_count[0], h->ref_count[1],
4072 s->qscale,
4073 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4074 h->use_weight,
4075 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4076 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4080 return 0;
4086 static inline int get_level_prefix(GetBitContext *gb){
4087 unsigned int buf;
4088 int log;
4090 OPEN_READER(re, gb);
4091 UPDATE_CACHE(re, gb);
4092 buf=GET_CACHE(re, gb);
4094 log= 32 - av_log2(buf);
4095 #ifdef TRACE
4096 print_bin(buf>>(32-log), log);
4097 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4098 #endif
4100 LAST_SKIP_BITS(re, gb, log);
4101 CLOSE_READER(re, gb);
4103 return log-1;
4106 static inline int get_dct8x8_allowed(H264Context *h){
4107 if(h->sps.direct_8x8_inference_flag)
4108 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4109 else
4110 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4114 * decodes a residual block.
4115 * @param n block index
4116 * @param scantable scantable
4117 * @param max_coeff number of coefficients in the block
4118 * @return <0 if an error occurred
4120 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4121 MpegEncContext * const s = &h->s;
4122 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4123 int level[16];
4124 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4126 //FIXME put trailing_onex into the context
4128 if(n == CHROMA_DC_BLOCK_INDEX){
4129 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4130 total_coeff= coeff_token>>2;
4131 }else{
4132 if(n == LUMA_DC_BLOCK_INDEX){
4133 total_coeff= pred_non_zero_count(h, 0);
4134 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4135 total_coeff= coeff_token>>2;
4136 }else{
4137 total_coeff= pred_non_zero_count(h, n);
4138 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4139 total_coeff= coeff_token>>2;
4140 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4144 //FIXME set last_non_zero?
4146 if(total_coeff==0)
4147 return 0;
4148 if(total_coeff > (unsigned)max_coeff) {
4149 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4150 return -1;
4153 trailing_ones= coeff_token&3;
4154 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4155 assert(total_coeff<=16);
4157 i = show_bits(gb, 3);
4158 skip_bits(gb, trailing_ones);
4159 level[0] = 1-((i&4)>>1);
4160 level[1] = 1-((i&2) );
4161 level[2] = 1-((i&1)<<1);
4163 if(trailing_ones<total_coeff) {
4164 int mask, prefix;
4165 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4166 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4167 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4169 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4170 if(level_code >= 100){
4171 prefix= level_code - 100;
4172 if(prefix == LEVEL_TAB_BITS)
4173 prefix += get_level_prefix(gb);
4175 //first coefficient has suffix_length equal to 0 or 1
4176 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4177 if(suffix_length)
4178 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4179 else
4180 level_code= (prefix<<suffix_length); //part
4181 }else if(prefix==14){
4182 if(suffix_length)
4183 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4184 else
4185 level_code= prefix + get_bits(gb, 4); //part
4186 }else{
4187 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4188 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4189 if(prefix>=16)
4190 level_code += (1<<(prefix-3))-4096;
4193 if(trailing_ones < 3) level_code += 2;
4195 suffix_length = 2;
4196 mask= -(level_code&1);
4197 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4198 }else{
4199 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4201 suffix_length = 1;
4202 if(level_code + 3U > 6U)
4203 suffix_length++;
4204 level[trailing_ones]= level_code;
4207 //remaining coefficients have suffix_length > 0
4208 for(i=trailing_ones+1;i<total_coeff;i++) {
4209 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4210 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4211 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4213 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4214 if(level_code >= 100){
4215 prefix= level_code - 100;
4216 if(prefix == LEVEL_TAB_BITS){
4217 prefix += get_level_prefix(gb);
4219 if(prefix<15){
4220 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4221 }else{
4222 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4223 if(prefix>=16)
4224 level_code += (1<<(prefix-3))-4096;
4226 mask= -(level_code&1);
4227 level_code= (((2+level_code)>>1) ^ mask) - mask;
4229 level[i]= level_code;
4231 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4232 suffix_length++;
4236 if(total_coeff == max_coeff)
4237 zeros_left=0;
4238 else{
4239 if(n == CHROMA_DC_BLOCK_INDEX)
4240 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4241 else
4242 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4245 coeff_num = zeros_left + total_coeff - 1;
4246 j = scantable[coeff_num];
4247 if(n > 24){
4248 block[j] = level[0];
4249 for(i=1;i<total_coeff;i++) {
4250 if(zeros_left <= 0)
4251 run_before = 0;
4252 else if(zeros_left < 7){
4253 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4254 }else{
4255 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4257 zeros_left -= run_before;
4258 coeff_num -= 1 + run_before;
4259 j= scantable[ coeff_num ];
4261 block[j]= level[i];
4263 }else{
4264 block[j] = (level[0] * qmul[j] + 32)>>6;
4265 for(i=1;i<total_coeff;i++) {
4266 if(zeros_left <= 0)
4267 run_before = 0;
4268 else if(zeros_left < 7){
4269 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4270 }else{
4271 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4273 zeros_left -= run_before;
4274 coeff_num -= 1 + run_before;
4275 j= scantable[ coeff_num ];
4277 block[j]= (level[i] * qmul[j] + 32)>>6;
4281 if(zeros_left<0){
4282 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4283 return -1;
4286 return 0;
4289 static void predict_field_decoding_flag(H264Context *h){
4290 MpegEncContext * const s = &h->s;
4291 const int mb_xy= h->mb_xy;
4292 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4293 ? s->current_picture.mb_type[mb_xy-1]
4294 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4295 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4296 : 0;
4297 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4301 * decodes a P_SKIP or B_SKIP macroblock
4303 static void decode_mb_skip(H264Context *h){
4304 MpegEncContext * const s = &h->s;
4305 const int mb_xy= h->mb_xy;
4306 int mb_type=0;
4308 memset(h->non_zero_count[mb_xy], 0, 16);
4309 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4311 if(MB_FIELD)
4312 mb_type|= MB_TYPE_INTERLACED;
4314 if( h->slice_type_nos == FF_B_TYPE )
4316 // just for fill_caches. pred_direct_motion will set the real mb_type
4317 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4319 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4320 pred_direct_motion(h, &mb_type);
4321 mb_type|= MB_TYPE_SKIP;
4323 else
4325 int mx, my;
4326 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4328 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4329 pred_pskip_motion(h, &mx, &my);
4330 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4331 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4334 write_back_motion(h, mb_type);
4335 s->current_picture.mb_type[mb_xy]= mb_type;
4336 s->current_picture.qscale_table[mb_xy]= s->qscale;
4337 h->slice_table[ mb_xy ]= h->slice_num;
4338 h->prev_mb_skipped= 1;
4342 * decodes a macroblock
4343 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4345 static int decode_mb_cavlc(H264Context *h){
4346 MpegEncContext * const s = &h->s;
4347 int mb_xy;
4348 int partition_count;
4349 unsigned int mb_type, cbp;
4350 int dct8x8_allowed= h->pps.transform_8x8_mode;
4352 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4354 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4355 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4356 down the code */
4357 if(h->slice_type_nos != FF_I_TYPE){
4358 if(s->mb_skip_run==-1)
4359 s->mb_skip_run= get_ue_golomb(&s->gb);
4361 if (s->mb_skip_run--) {
4362 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4363 if(s->mb_skip_run==0)
4364 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4365 else
4366 predict_field_decoding_flag(h);
4368 decode_mb_skip(h);
4369 return 0;
4372 if(FRAME_MBAFF){
4373 if( (s->mb_y&1) == 0 )
4374 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4377 h->prev_mb_skipped= 0;
4379 mb_type= get_ue_golomb(&s->gb);
4380 if(h->slice_type_nos == FF_B_TYPE){
4381 if(mb_type < 23){
4382 partition_count= b_mb_type_info[mb_type].partition_count;
4383 mb_type= b_mb_type_info[mb_type].type;
4384 }else{
4385 mb_type -= 23;
4386 goto decode_intra_mb;
4388 }else if(h->slice_type_nos == FF_P_TYPE){
4389 if(mb_type < 5){
4390 partition_count= p_mb_type_info[mb_type].partition_count;
4391 mb_type= p_mb_type_info[mb_type].type;
4392 }else{
4393 mb_type -= 5;
4394 goto decode_intra_mb;
4396 }else{
4397 assert(h->slice_type_nos == FF_I_TYPE);
4398 if(h->slice_type == FF_SI_TYPE && mb_type)
4399 mb_type--;
4400 decode_intra_mb:
4401 if(mb_type > 25){
4402 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4403 return -1;
4405 partition_count=0;
4406 cbp= i_mb_type_info[mb_type].cbp;
4407 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4408 mb_type= i_mb_type_info[mb_type].type;
4411 if(MB_FIELD)
4412 mb_type |= MB_TYPE_INTERLACED;
4414 h->slice_table[ mb_xy ]= h->slice_num;
4416 if(IS_INTRA_PCM(mb_type)){
4417 unsigned int x;
4419 // We assume these blocks are very rare so we do not optimize it.
4420 align_get_bits(&s->gb);
4422 // The pixels are stored in the same order as levels in h->mb array.
4423 for(x=0; x < (CHROMA ? 384 : 256); x++){
4424 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4427 // In deblocking, the quantizer is 0
4428 s->current_picture.qscale_table[mb_xy]= 0;
4429 // All coeffs are present
4430 memset(h->non_zero_count[mb_xy], 16, 16);
4432 s->current_picture.mb_type[mb_xy]= mb_type;
4433 return 0;
4436 if(MB_MBAFF){
4437 h->ref_count[0] <<= 1;
4438 h->ref_count[1] <<= 1;
4441 fill_caches(h, mb_type, 0);
4443 //mb_pred
4444 if(IS_INTRA(mb_type)){
4445 int pred_mode;
4446 // init_top_left_availability(h);
4447 if(IS_INTRA4x4(mb_type)){
4448 int i;
4449 int di = 1;
4450 if(dct8x8_allowed && get_bits1(&s->gb)){
4451 mb_type |= MB_TYPE_8x8DCT;
4452 di = 4;
4455 // fill_intra4x4_pred_table(h);
4456 for(i=0; i<16; i+=di){
4457 int mode= pred_intra_mode(h, i);
4459 if(!get_bits1(&s->gb)){
4460 const int rem_mode= get_bits(&s->gb, 3);
4461 mode = rem_mode + (rem_mode >= mode);
4464 if(di==4)
4465 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4466 else
4467 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4469 write_back_intra_pred_mode(h);
4470 if( check_intra4x4_pred_mode(h) < 0)
4471 return -1;
4472 }else{
4473 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4474 if(h->intra16x16_pred_mode < 0)
4475 return -1;
4477 if(CHROMA){
4478 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4479 if(pred_mode < 0)
4480 return -1;
4481 h->chroma_pred_mode= pred_mode;
4483 }else if(partition_count==4){
4484 int i, j, sub_partition_count[4], list, ref[2][4];
4486 if(h->slice_type_nos == FF_B_TYPE){
4487 for(i=0; i<4; i++){
4488 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4489 if(h->sub_mb_type[i] >=13){
4490 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4491 return -1;
4493 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4494 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4496 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4497 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4498 pred_direct_motion(h, &mb_type);
4499 h->ref_cache[0][scan8[4]] =
4500 h->ref_cache[1][scan8[4]] =
4501 h->ref_cache[0][scan8[12]] =
4502 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4504 }else{
4505 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4506 for(i=0; i<4; i++){
4507 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4508 if(h->sub_mb_type[i] >=4){
4509 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4510 return -1;
4512 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4513 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4517 for(list=0; list<h->list_count; list++){
4518 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4519 for(i=0; i<4; i++){
4520 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4521 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4522 unsigned int tmp;
4523 if(ref_count == 1){
4524 tmp= 0;
4525 }else if(ref_count == 2){
4526 tmp= get_bits1(&s->gb)^1;
4527 }else{
4528 tmp= get_ue_golomb_31(&s->gb);
4529 if(tmp>=ref_count){
4530 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4531 return -1;
4534 ref[list][i]= tmp;
4535 }else{
4536 //FIXME
4537 ref[list][i] = -1;
4542 if(dct8x8_allowed)
4543 dct8x8_allowed = get_dct8x8_allowed(h);
4545 for(list=0; list<h->list_count; list++){
4546 for(i=0; i<4; i++){
4547 if(IS_DIRECT(h->sub_mb_type[i])) {
4548 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4549 continue;
4551 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4552 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4554 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4555 const int sub_mb_type= h->sub_mb_type[i];
4556 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4557 for(j=0; j<sub_partition_count[i]; j++){
4558 int mx, my;
4559 const int index= 4*i + block_width*j;
4560 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4561 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4562 mx += get_se_golomb(&s->gb);
4563 my += get_se_golomb(&s->gb);
4564 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4566 if(IS_SUB_8X8(sub_mb_type)){
4567 mv_cache[ 1 ][0]=
4568 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4569 mv_cache[ 1 ][1]=
4570 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4571 }else if(IS_SUB_8X4(sub_mb_type)){
4572 mv_cache[ 1 ][0]= mx;
4573 mv_cache[ 1 ][1]= my;
4574 }else if(IS_SUB_4X8(sub_mb_type)){
4575 mv_cache[ 8 ][0]= mx;
4576 mv_cache[ 8 ][1]= my;
4578 mv_cache[ 0 ][0]= mx;
4579 mv_cache[ 0 ][1]= my;
4581 }else{
4582 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4583 p[0] = p[1]=
4584 p[8] = p[9]= 0;
4588 }else if(IS_DIRECT(mb_type)){
4589 pred_direct_motion(h, &mb_type);
4590 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4591 }else{
4592 int list, mx, my, i;
4593 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4594 if(IS_16X16(mb_type)){
4595 for(list=0; list<h->list_count; list++){
4596 unsigned int val;
4597 if(IS_DIR(mb_type, 0, list)){
4598 if(h->ref_count[list]==1){
4599 val= 0;
4600 }else if(h->ref_count[list]==2){
4601 val= get_bits1(&s->gb)^1;
4602 }else{
4603 val= get_ue_golomb_31(&s->gb);
4604 if(val >= h->ref_count[list]){
4605 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4606 return -1;
4609 }else
4610 val= LIST_NOT_USED&0xFF;
4611 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4613 for(list=0; list<h->list_count; list++){
4614 unsigned int val;
4615 if(IS_DIR(mb_type, 0, list)){
4616 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4617 mx += get_se_golomb(&s->gb);
4618 my += get_se_golomb(&s->gb);
4619 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4621 val= pack16to32(mx,my);
4622 }else
4623 val=0;
4624 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4627 else if(IS_16X8(mb_type)){
4628 for(list=0; list<h->list_count; list++){
4629 for(i=0; i<2; i++){
4630 unsigned int val;
4631 if(IS_DIR(mb_type, i, list)){
4632 if(h->ref_count[list] == 1){
4633 val= 0;
4634 }else if(h->ref_count[list] == 2){
4635 val= get_bits1(&s->gb)^1;
4636 }else{
4637 val= get_ue_golomb_31(&s->gb);
4638 if(val >= h->ref_count[list]){
4639 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4640 return -1;
4643 }else
4644 val= LIST_NOT_USED&0xFF;
4645 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4648 for(list=0; list<h->list_count; list++){
4649 for(i=0; i<2; i++){
4650 unsigned int val;
4651 if(IS_DIR(mb_type, i, list)){
4652 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4653 mx += get_se_golomb(&s->gb);
4654 my += get_se_golomb(&s->gb);
4655 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4657 val= pack16to32(mx,my);
4658 }else
4659 val=0;
4660 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4663 }else{
4664 assert(IS_8X16(mb_type));
4665 for(list=0; list<h->list_count; list++){
4666 for(i=0; i<2; i++){
4667 unsigned int val;
4668 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4669 if(h->ref_count[list]==1){
4670 val= 0;
4671 }else if(h->ref_count[list]==2){
4672 val= get_bits1(&s->gb)^1;
4673 }else{
4674 val= get_ue_golomb_31(&s->gb);
4675 if(val >= h->ref_count[list]){
4676 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4677 return -1;
4680 }else
4681 val= LIST_NOT_USED&0xFF;
4682 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4685 for(list=0; list<h->list_count; list++){
4686 for(i=0; i<2; i++){
4687 unsigned int val;
4688 if(IS_DIR(mb_type, i, list)){
4689 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4690 mx += get_se_golomb(&s->gb);
4691 my += get_se_golomb(&s->gb);
4692 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4694 val= pack16to32(mx,my);
4695 }else
4696 val=0;
4697 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4703 if(IS_INTER(mb_type))
4704 write_back_motion(h, mb_type);
4706 if(!IS_INTRA16x16(mb_type)){
4707 cbp= get_ue_golomb(&s->gb);
4708 if(cbp > 47){
4709 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4710 return -1;
4713 if(CHROMA){
4714 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4715 else cbp= golomb_to_inter_cbp [cbp];
4716 }else{
4717 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4718 else cbp= golomb_to_inter_cbp_gray[cbp];
4721 h->cbp = cbp;
4723 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4724 if(get_bits1(&s->gb)){
4725 mb_type |= MB_TYPE_8x8DCT;
4726 h->cbp_table[mb_xy]= cbp;
4729 s->current_picture.mb_type[mb_xy]= mb_type;
4731 if(cbp || IS_INTRA16x16(mb_type)){
4732 int i8x8, i4x4, chroma_idx;
4733 int dquant;
4734 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4735 const uint8_t *scan, *scan8x8, *dc_scan;
4737 // fill_non_zero_count_cache(h);
4739 if(IS_INTERLACED(mb_type)){
4740 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4741 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4742 dc_scan= luma_dc_field_scan;
4743 }else{
4744 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4745 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4746 dc_scan= luma_dc_zigzag_scan;
4749 dquant= get_se_golomb(&s->gb);
4751 if( dquant > 25 || dquant < -26 ){
4752 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4753 return -1;
4756 s->qscale += dquant;
4757 if(((unsigned)s->qscale) > 51){
4758 if(s->qscale<0) s->qscale+= 52;
4759 else s->qscale-= 52;
4762 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4763 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4764 if(IS_INTRA16x16(mb_type)){
4765 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4766 return -1; //FIXME continue if partitioned and other return -1 too
4769 assert((cbp&15) == 0 || (cbp&15) == 15);
4771 if(cbp&15){
4772 for(i8x8=0; i8x8<4; i8x8++){
4773 for(i4x4=0; i4x4<4; i4x4++){
4774 const int index= i4x4 + 4*i8x8;
4775 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4776 return -1;
4780 }else{
4781 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4783 }else{
4784 for(i8x8=0; i8x8<4; i8x8++){
4785 if(cbp & (1<<i8x8)){
4786 if(IS_8x8DCT(mb_type)){
4787 DCTELEM *buf = &h->mb[64*i8x8];
4788 uint8_t *nnz;
4789 for(i4x4=0; i4x4<4; i4x4++){
4790 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4791 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4792 return -1;
4794 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4795 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4796 }else{
4797 for(i4x4=0; i4x4<4; i4x4++){
4798 const int index= i4x4 + 4*i8x8;
4800 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4801 return -1;
4805 }else{
4806 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4807 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4812 if(cbp&0x30){
4813 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4814 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4815 return -1;
4819 if(cbp&0x20){
4820 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4821 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4822 for(i4x4=0; i4x4<4; i4x4++){
4823 const int index= 16 + 4*chroma_idx + i4x4;
4824 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4825 return -1;
4829 }else{
4830 uint8_t * const nnz= &h->non_zero_count_cache[0];
4831 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4832 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4834 }else{
4835 uint8_t * const nnz= &h->non_zero_count_cache[0];
4836 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4837 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4838 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4840 s->current_picture.qscale_table[mb_xy]= s->qscale;
4841 write_back_non_zero_count(h);
4843 if(MB_MBAFF){
4844 h->ref_count[0] >>= 1;
4845 h->ref_count[1] >>= 1;
4848 return 0;
4851 static int decode_cabac_field_decoding_flag(H264Context *h) {
4852 MpegEncContext * const s = &h->s;
4853 const int mb_x = s->mb_x;
4854 const int mb_y = s->mb_y & ~1;
4855 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4856 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4858 unsigned int ctx = 0;
4860 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4861 ctx += 1;
4863 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4864 ctx += 1;
4867 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4870 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4871 uint8_t *state= &h->cabac_state[ctx_base];
4872 int mb_type;
4874 if(intra_slice){
4875 MpegEncContext * const s = &h->s;
4876 const int mba_xy = h->left_mb_xy[0];
4877 const int mbb_xy = h->top_mb_xy;
4878 int ctx=0;
4879 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4880 ctx++;
4881 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4882 ctx++;
4883 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4884 return 0; /* I4x4 */
4885 state += 2;
4886 }else{
4887 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4888 return 0; /* I4x4 */
4891 if( get_cabac_terminate( &h->cabac ) )
4892 return 25; /* PCM */
4894 mb_type = 1; /* I16x16 */
4895 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4896 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4897 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4898 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4899 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4900 return mb_type;
4903 static int decode_cabac_mb_type_b( H264Context *h ) {
4904 MpegEncContext * const s = &h->s;
4906 const int mba_xy = h->left_mb_xy[0];
4907 const int mbb_xy = h->top_mb_xy;
4908 int ctx = 0;
4909 int bits;
4910 assert(h->slice_type_nos == FF_B_TYPE);
4912 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4913 ctx++;
4914 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4915 ctx++;
4917 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4918 return 0; /* B_Direct_16x16 */
4920 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4921 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4924 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4925 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4926 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4927 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4928 if( bits < 8 )
4929 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4930 else if( bits == 13 ) {
4931 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4932 } else if( bits == 14 )
4933 return 11; /* B_L1_L0_8x16 */
4934 else if( bits == 15 )
4935 return 22; /* B_8x8 */
4937 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4938 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4941 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4942 MpegEncContext * const s = &h->s;
4943 int mba_xy, mbb_xy;
4944 int ctx = 0;
4946 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4947 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4948 mba_xy = mb_xy - 1;
4949 if( (mb_y&1)
4950 && h->slice_table[mba_xy] == h->slice_num
4951 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4952 mba_xy += s->mb_stride;
4953 if( MB_FIELD ){
4954 mbb_xy = mb_xy - s->mb_stride;
4955 if( !(mb_y&1)
4956 && h->slice_table[mbb_xy] == h->slice_num
4957 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4958 mbb_xy -= s->mb_stride;
4959 }else
4960 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4961 }else{
4962 int mb_xy = h->mb_xy;
4963 mba_xy = mb_xy - 1;
4964 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4967 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4968 ctx++;
4969 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4970 ctx++;
4972 if( h->slice_type_nos == FF_B_TYPE )
4973 ctx += 13;
4974 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4977 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4978 int mode = 0;
4980 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4981 return pred_mode;
4983 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4984 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4985 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4987 if( mode >= pred_mode )
4988 return mode + 1;
4989 else
4990 return mode;
4993 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4994 const int mba_xy = h->left_mb_xy[0];
4995 const int mbb_xy = h->top_mb_xy;
4997 int ctx = 0;
4999 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5000 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5001 ctx++;
5003 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5004 ctx++;
5006 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5007 return 0;
5009 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5010 return 1;
5011 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5012 return 2;
5013 else
5014 return 3;
5017 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5018 int cbp_b, cbp_a, ctx, cbp = 0;
5020 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5021 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5023 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5024 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5025 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5026 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5027 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5028 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5029 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5030 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5031 return cbp;
5033 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5034 int ctx;
5035 int cbp_a, cbp_b;
5037 cbp_a = (h->left_cbp>>4)&0x03;
5038 cbp_b = (h-> top_cbp>>4)&0x03;
5040 ctx = 0;
5041 if( cbp_a > 0 ) ctx++;
5042 if( cbp_b > 0 ) ctx += 2;
5043 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5044 return 0;
5046 ctx = 4;
5047 if( cbp_a == 2 ) ctx++;
5048 if( cbp_b == 2 ) ctx += 2;
5049 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5051 static int decode_cabac_mb_dqp( H264Context *h) {
5052 int ctx= h->last_qscale_diff != 0;
5053 int val = 0;
5055 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5056 ctx= 2+(ctx>>1);
5057 val++;
5058 if(val > 102) //prevent infinite loop
5059 return INT_MIN;
5062 if( val&0x01 )
5063 return (val + 1)>>1 ;
5064 else
5065 return -((val + 1)>>1);
5067 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5068 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5069 return 0; /* 8x8 */
5070 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5071 return 1; /* 8x4 */
5072 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5073 return 2; /* 4x8 */
5074 return 3; /* 4x4 */
5076 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5077 int type;
5078 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5079 return 0; /* B_Direct_8x8 */
5080 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5081 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5082 type = 3;
5083 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5084 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5085 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5086 type += 4;
5088 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5089 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5090 return type;
5093 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5094 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5097 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5098 int refa = h->ref_cache[list][scan8[n] - 1];
5099 int refb = h->ref_cache[list][scan8[n] - 8];
5100 int ref = 0;
5101 int ctx = 0;
5103 if( h->slice_type_nos == FF_B_TYPE) {
5104 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5105 ctx++;
5106 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5107 ctx += 2;
5108 } else {
5109 if( refa > 0 )
5110 ctx++;
5111 if( refb > 0 )
5112 ctx += 2;
5115 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5116 ref++;
5117 ctx = (ctx>>2)+4;
5118 if(ref >= 32 /*h->ref_list[list]*/){
5119 return -1;
5122 return ref;
5125 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5126 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5127 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5128 int ctxbase = (l == 0) ? 40 : 47;
5129 int mvd;
5130 int ctx = (amvd>2) + (amvd>32);
5132 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5133 return 0;
5135 mvd= 1;
5136 ctx= 3;
5137 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5138 mvd++;
5139 if( ctx < 6 )
5140 ctx++;
5143 if( mvd >= 9 ) {
5144 int k = 3;
5145 while( get_cabac_bypass( &h->cabac ) ) {
5146 mvd += 1 << k;
5147 k++;
5148 if(k>24){
5149 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5150 return INT_MIN;
5153 while( k-- ) {
5154 if( get_cabac_bypass( &h->cabac ) )
5155 mvd += 1 << k;
5158 return get_cabac_bypass_sign( &h->cabac, -mvd );
5161 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5162 int nza, nzb;
5163 int ctx = 0;
5165 if( is_dc ) {
5166 if( cat == 0 ) {
5167 nza = h->left_cbp&0x100;
5168 nzb = h-> top_cbp&0x100;
5169 } else {
5170 nza = (h->left_cbp>>(6+idx))&0x01;
5171 nzb = (h-> top_cbp>>(6+idx))&0x01;
5173 } else {
5174 assert(cat == 1 || cat == 2 || cat == 4);
5175 nza = h->non_zero_count_cache[scan8[idx] - 1];
5176 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5179 if( nza > 0 )
5180 ctx++;
5182 if( nzb > 0 )
5183 ctx += 2;
5185 return ctx + 4 * cat;
5188 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5189 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5190 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5191 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5192 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5195 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5196 static const int significant_coeff_flag_offset[2][6] = {
5197 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5198 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5200 static const int last_coeff_flag_offset[2][6] = {
5201 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5202 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5204 static const int coeff_abs_level_m1_offset[6] = {
5205 227+0, 227+10, 227+20, 227+30, 227+39, 426
5207 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5208 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5209 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5210 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5211 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5212 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5213 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5214 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5215 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5217 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5218 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5219 * map node ctx => cabac ctx for level=1 */
5220 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5221 /* map node ctx => cabac ctx for level>1 */
5222 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5223 static const uint8_t coeff_abs_level_transition[2][8] = {
5224 /* update node ctx after decoding a level=1 */
5225 { 1, 2, 3, 3, 4, 5, 6, 7 },
5226 /* update node ctx after decoding a level>1 */
5227 { 4, 4, 4, 4, 5, 6, 7, 7 }
5230 int index[64];
5232 int av_unused last;
5233 int coeff_count = 0;
5234 int node_ctx = 0;
5236 uint8_t *significant_coeff_ctx_base;
5237 uint8_t *last_coeff_ctx_base;
5238 uint8_t *abs_level_m1_ctx_base;
5240 #if !ARCH_X86
5241 #define CABAC_ON_STACK
5242 #endif
5243 #ifdef CABAC_ON_STACK
5244 #define CC &cc
5245 CABACContext cc;
5246 cc.range = h->cabac.range;
5247 cc.low = h->cabac.low;
5248 cc.bytestream= h->cabac.bytestream;
5249 #else
5250 #define CC &h->cabac
5251 #endif
5254 /* cat: 0-> DC 16x16 n = 0
5255 * 1-> AC 16x16 n = luma4x4idx
5256 * 2-> Luma4x4 n = luma4x4idx
5257 * 3-> DC Chroma n = iCbCr
5258 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5259 * 5-> Luma8x8 n = 4 * luma8x8idx
5262 /* read coded block flag */
5263 if( is_dc || cat != 5 ) {
5264 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5265 if( !is_dc )
5266 h->non_zero_count_cache[scan8[n]] = 0;
5268 #ifdef CABAC_ON_STACK
5269 h->cabac.range = cc.range ;
5270 h->cabac.low = cc.low ;
5271 h->cabac.bytestream= cc.bytestream;
5272 #endif
5273 return;
5277 significant_coeff_ctx_base = h->cabac_state
5278 + significant_coeff_flag_offset[MB_FIELD][cat];
5279 last_coeff_ctx_base = h->cabac_state
5280 + last_coeff_flag_offset[MB_FIELD][cat];
5281 abs_level_m1_ctx_base = h->cabac_state
5282 + coeff_abs_level_m1_offset[cat];
5284 if( !is_dc && cat == 5 ) {
5285 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5286 for(last= 0; last < coefs; last++) { \
5287 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5288 if( get_cabac( CC, sig_ctx )) { \
5289 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5290 index[coeff_count++] = last; \
5291 if( get_cabac( CC, last_ctx ) ) { \
5292 last= max_coeff; \
5293 break; \
5297 if( last == max_coeff -1 ) {\
5298 index[coeff_count++] = last;\
5300 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5301 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5302 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5303 } else {
5304 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5305 #else
5306 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5307 } else {
5308 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5309 #endif
5311 assert(coeff_count > 0);
5313 if( is_dc ) {
5314 if( cat == 0 )
5315 h->cbp_table[h->mb_xy] |= 0x100;
5316 else
5317 h->cbp_table[h->mb_xy] |= 0x40 << n;
5318 } else {
5319 if( cat == 5 )
5320 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5321 else {
5322 assert( cat == 1 || cat == 2 || cat == 4 );
5323 h->non_zero_count_cache[scan8[n]] = coeff_count;
5327 do {
5328 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5330 int j= scantable[index[--coeff_count]];
5332 if( get_cabac( CC, ctx ) == 0 ) {
5333 node_ctx = coeff_abs_level_transition[0][node_ctx];
5334 if( is_dc ) {
5335 block[j] = get_cabac_bypass_sign( CC, -1);
5336 }else{
5337 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5339 } else {
5340 int coeff_abs = 2;
5341 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5342 node_ctx = coeff_abs_level_transition[1][node_ctx];
5344 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5345 coeff_abs++;
5348 if( coeff_abs >= 15 ) {
5349 int j = 0;
5350 while( get_cabac_bypass( CC ) ) {
5351 j++;
5354 coeff_abs=1;
5355 while( j-- ) {
5356 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5358 coeff_abs+= 14;
5361 if( is_dc ) {
5362 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5363 }else{
5364 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5367 } while( coeff_count );
5368 #ifdef CABAC_ON_STACK
5369 h->cabac.range = cc.range ;
5370 h->cabac.low = cc.low ;
5371 h->cabac.bytestream= cc.bytestream;
5372 #endif
5376 #if !CONFIG_SMALL
5377 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5378 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5381 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5382 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5384 #endif
5386 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5387 #if CONFIG_SMALL
5388 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5389 #else
5390 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5391 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5392 #endif
5395 static inline void compute_mb_neighbors(H264Context *h)
5397 MpegEncContext * const s = &h->s;
5398 const int mb_xy = h->mb_xy;
5399 h->top_mb_xy = mb_xy - s->mb_stride;
5400 h->left_mb_xy[0] = mb_xy - 1;
5401 if(FRAME_MBAFF){
5402 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5403 const int top_pair_xy = pair_xy - s->mb_stride;
5404 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5405 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5406 const int curr_mb_field_flag = MB_FIELD;
5407 const int bottom = (s->mb_y & 1);
5409 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5410 h->top_mb_xy -= s->mb_stride;
5412 if (!left_mb_field_flag == curr_mb_field_flag) {
5413 h->left_mb_xy[0] = pair_xy - 1;
5415 } else if (FIELD_PICTURE) {
5416 h->top_mb_xy -= s->mb_stride;
5418 return;
5422 * decodes a macroblock
5423 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5425 static int decode_mb_cabac(H264Context *h) {
5426 MpegEncContext * const s = &h->s;
5427 int mb_xy;
5428 int mb_type, partition_count, cbp = 0;
5429 int dct8x8_allowed= h->pps.transform_8x8_mode;
5431 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5433 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5434 if( h->slice_type_nos != FF_I_TYPE ) {
5435 int skip;
5436 /* a skipped mb needs the aff flag from the following mb */
5437 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5438 predict_field_decoding_flag(h);
5439 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5440 skip = h->next_mb_skipped;
5441 else
5442 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5443 /* read skip flags */
5444 if( skip ) {
5445 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5446 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5447 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5448 if(!h->next_mb_skipped)
5449 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5452 decode_mb_skip(h);
5454 h->cbp_table[mb_xy] = 0;
5455 h->chroma_pred_mode_table[mb_xy] = 0;
5456 h->last_qscale_diff = 0;
5458 return 0;
5462 if(FRAME_MBAFF){
5463 if( (s->mb_y&1) == 0 )
5464 h->mb_mbaff =
5465 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5468 h->prev_mb_skipped = 0;
5470 compute_mb_neighbors(h);
5472 if( h->slice_type_nos == FF_B_TYPE ) {
5473 mb_type = decode_cabac_mb_type_b( h );
5474 if( mb_type < 23 ){
5475 partition_count= b_mb_type_info[mb_type].partition_count;
5476 mb_type= b_mb_type_info[mb_type].type;
5477 }else{
5478 mb_type -= 23;
5479 goto decode_intra_mb;
5481 } else if( h->slice_type_nos == FF_P_TYPE ) {
5482 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5483 /* P-type */
5484 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5485 /* P_L0_D16x16, P_8x8 */
5486 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5487 } else {
5488 /* P_L0_D8x16, P_L0_D16x8 */
5489 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5491 partition_count= p_mb_type_info[mb_type].partition_count;
5492 mb_type= p_mb_type_info[mb_type].type;
5493 } else {
5494 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5495 goto decode_intra_mb;
5497 } else {
5498 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5499 if(h->slice_type == FF_SI_TYPE && mb_type)
5500 mb_type--;
5501 assert(h->slice_type_nos == FF_I_TYPE);
5502 decode_intra_mb:
5503 partition_count = 0;
5504 cbp= i_mb_type_info[mb_type].cbp;
5505 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5506 mb_type= i_mb_type_info[mb_type].type;
5508 if(MB_FIELD)
5509 mb_type |= MB_TYPE_INTERLACED;
5511 h->slice_table[ mb_xy ]= h->slice_num;
5513 if(IS_INTRA_PCM(mb_type)) {
5514 const uint8_t *ptr;
5516 // We assume these blocks are very rare so we do not optimize it.
5517 // FIXME The two following lines get the bitstream position in the cabac
5518 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5519 ptr= h->cabac.bytestream;
5520 if(h->cabac.low&0x1) ptr--;
5521 if(CABAC_BITS==16){
5522 if(h->cabac.low&0x1FF) ptr--;
5525 // The pixels are stored in the same order as levels in h->mb array.
5526 memcpy(h->mb, ptr, 256); ptr+=256;
5527 if(CHROMA){
5528 memcpy(h->mb+128, ptr, 128); ptr+=128;
5531 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5533 // All blocks are present
5534 h->cbp_table[mb_xy] = 0x1ef;
5535 h->chroma_pred_mode_table[mb_xy] = 0;
5536 // In deblocking, the quantizer is 0
5537 s->current_picture.qscale_table[mb_xy]= 0;
5538 // All coeffs are present
5539 memset(h->non_zero_count[mb_xy], 16, 16);
5540 s->current_picture.mb_type[mb_xy]= mb_type;
5541 h->last_qscale_diff = 0;
5542 return 0;
5545 if(MB_MBAFF){
5546 h->ref_count[0] <<= 1;
5547 h->ref_count[1] <<= 1;
5550 fill_caches(h, mb_type, 0);
5552 if( IS_INTRA( mb_type ) ) {
5553 int i, pred_mode;
5554 if( IS_INTRA4x4( mb_type ) ) {
5555 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5556 mb_type |= MB_TYPE_8x8DCT;
5557 for( i = 0; i < 16; i+=4 ) {
5558 int pred = pred_intra_mode( h, i );
5559 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5560 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5562 } else {
5563 for( i = 0; i < 16; i++ ) {
5564 int pred = pred_intra_mode( h, i );
5565 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5567 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5570 write_back_intra_pred_mode(h);
5571 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5572 } else {
5573 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5574 if( h->intra16x16_pred_mode < 0 ) return -1;
5576 if(CHROMA){
5577 h->chroma_pred_mode_table[mb_xy] =
5578 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5580 pred_mode= check_intra_pred_mode( h, pred_mode );
5581 if( pred_mode < 0 ) return -1;
5582 h->chroma_pred_mode= pred_mode;
5584 } else if( partition_count == 4 ) {
5585 int i, j, sub_partition_count[4], list, ref[2][4];
5587 if( h->slice_type_nos == FF_B_TYPE ) {
5588 for( i = 0; i < 4; i++ ) {
5589 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5590 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5591 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5593 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5594 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5595 pred_direct_motion(h, &mb_type);
5596 h->ref_cache[0][scan8[4]] =
5597 h->ref_cache[1][scan8[4]] =
5598 h->ref_cache[0][scan8[12]] =
5599 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5600 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5601 for( i = 0; i < 4; i++ )
5602 if( IS_DIRECT(h->sub_mb_type[i]) )
5603 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5606 } else {
5607 for( i = 0; i < 4; i++ ) {
5608 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5609 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5610 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5614 for( list = 0; list < h->list_count; list++ ) {
5615 for( i = 0; i < 4; i++ ) {
5616 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5617 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5618 if( h->ref_count[list] > 1 ){
5619 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5620 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5621 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5622 return -1;
5624 }else
5625 ref[list][i] = 0;
5626 } else {
5627 ref[list][i] = -1;
5629 h->ref_cache[list][ scan8[4*i]+1 ]=
5630 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5634 if(dct8x8_allowed)
5635 dct8x8_allowed = get_dct8x8_allowed(h);
5637 for(list=0; list<h->list_count; list++){
5638 for(i=0; i<4; i++){
5639 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5640 if(IS_DIRECT(h->sub_mb_type[i])){
5641 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5642 continue;
5645 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5646 const int sub_mb_type= h->sub_mb_type[i];
5647 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5648 for(j=0; j<sub_partition_count[i]; j++){
5649 int mpx, mpy;
5650 int mx, my;
5651 const int index= 4*i + block_width*j;
5652 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5653 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5654 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5656 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5657 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5658 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5660 if(IS_SUB_8X8(sub_mb_type)){
5661 mv_cache[ 1 ][0]=
5662 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5663 mv_cache[ 1 ][1]=
5664 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5666 mvd_cache[ 1 ][0]=
5667 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5668 mvd_cache[ 1 ][1]=
5669 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5670 }else if(IS_SUB_8X4(sub_mb_type)){
5671 mv_cache[ 1 ][0]= mx;
5672 mv_cache[ 1 ][1]= my;
5674 mvd_cache[ 1 ][0]= mx - mpx;
5675 mvd_cache[ 1 ][1]= my - mpy;
5676 }else if(IS_SUB_4X8(sub_mb_type)){
5677 mv_cache[ 8 ][0]= mx;
5678 mv_cache[ 8 ][1]= my;
5680 mvd_cache[ 8 ][0]= mx - mpx;
5681 mvd_cache[ 8 ][1]= my - mpy;
5683 mv_cache[ 0 ][0]= mx;
5684 mv_cache[ 0 ][1]= my;
5686 mvd_cache[ 0 ][0]= mx - mpx;
5687 mvd_cache[ 0 ][1]= my - mpy;
5689 }else{
5690 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5691 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5692 p[0] = p[1] = p[8] = p[9] = 0;
5693 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5697 } else if( IS_DIRECT(mb_type) ) {
5698 pred_direct_motion(h, &mb_type);
5699 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5700 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5701 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5702 } else {
5703 int list, mx, my, i, mpx, mpy;
5704 if(IS_16X16(mb_type)){
5705 for(list=0; list<h->list_count; list++){
5706 if(IS_DIR(mb_type, 0, list)){
5707 int ref;
5708 if(h->ref_count[list] > 1){
5709 ref= decode_cabac_mb_ref(h, list, 0);
5710 if(ref >= (unsigned)h->ref_count[list]){
5711 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5712 return -1;
5714 }else
5715 ref=0;
5716 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5717 }else
5718 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5720 for(list=0; list<h->list_count; list++){
5721 if(IS_DIR(mb_type, 0, list)){
5722 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5724 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5725 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5726 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5728 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5729 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5730 }else
5731 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5734 else if(IS_16X8(mb_type)){
5735 for(list=0; list<h->list_count; list++){
5736 for(i=0; i<2; i++){
5737 if(IS_DIR(mb_type, i, list)){
5738 int ref;
5739 if(h->ref_count[list] > 1){
5740 ref= decode_cabac_mb_ref( h, list, 8*i );
5741 if(ref >= (unsigned)h->ref_count[list]){
5742 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5743 return -1;
5745 }else
5746 ref=0;
5747 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5748 }else
5749 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5752 for(list=0; list<h->list_count; list++){
5753 for(i=0; i<2; i++){
5754 if(IS_DIR(mb_type, i, list)){
5755 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5756 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5757 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5758 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5760 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5761 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5762 }else{
5763 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5764 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5768 }else{
5769 assert(IS_8X16(mb_type));
5770 for(list=0; list<h->list_count; list++){
5771 for(i=0; i<2; i++){
5772 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5773 int ref;
5774 if(h->ref_count[list] > 1){
5775 ref= decode_cabac_mb_ref( h, list, 4*i );
5776 if(ref >= (unsigned)h->ref_count[list]){
5777 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5778 return -1;
5780 }else
5781 ref=0;
5782 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5783 }else
5784 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5787 for(list=0; list<h->list_count; list++){
5788 for(i=0; i<2; i++){
5789 if(IS_DIR(mb_type, i, list)){
5790 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5791 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5792 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5794 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5795 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5796 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5797 }else{
5798 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5799 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5806 if( IS_INTER( mb_type ) ) {
5807 h->chroma_pred_mode_table[mb_xy] = 0;
5808 write_back_motion( h, mb_type );
5811 if( !IS_INTRA16x16( mb_type ) ) {
5812 cbp = decode_cabac_mb_cbp_luma( h );
5813 if(CHROMA)
5814 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5817 h->cbp_table[mb_xy] = h->cbp = cbp;
5819 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5820 if( decode_cabac_mb_transform_size( h ) )
5821 mb_type |= MB_TYPE_8x8DCT;
5823 s->current_picture.mb_type[mb_xy]= mb_type;
5825 if( cbp || IS_INTRA16x16( mb_type ) ) {
5826 const uint8_t *scan, *scan8x8, *dc_scan;
5827 const uint32_t *qmul;
5828 int dqp;
5830 if(IS_INTERLACED(mb_type)){
5831 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5832 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5833 dc_scan= luma_dc_field_scan;
5834 }else{
5835 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5836 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5837 dc_scan= luma_dc_zigzag_scan;
5840 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5841 if( dqp == INT_MIN ){
5842 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5843 return -1;
5845 s->qscale += dqp;
5846 if(((unsigned)s->qscale) > 51){
5847 if(s->qscale<0) s->qscale+= 52;
5848 else s->qscale-= 52;
5850 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5851 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5853 if( IS_INTRA16x16( mb_type ) ) {
5854 int i;
5855 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5856 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5858 if( cbp&15 ) {
5859 qmul = h->dequant4_coeff[0][s->qscale];
5860 for( i = 0; i < 16; i++ ) {
5861 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5862 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5864 } else {
5865 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5867 } else {
5868 int i8x8, i4x4;
5869 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5870 if( cbp & (1<<i8x8) ) {
5871 if( IS_8x8DCT(mb_type) ) {
5872 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5873 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5874 } else {
5875 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5876 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5877 const int index = 4*i8x8 + i4x4;
5878 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5879 //START_TIMER
5880 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5881 //STOP_TIMER("decode_residual")
5884 } else {
5885 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5886 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5891 if( cbp&0x30 ){
5892 int c;
5893 for( c = 0; c < 2; c++ ) {
5894 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5895 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5899 if( cbp&0x20 ) {
5900 int c, i;
5901 for( c = 0; c < 2; c++ ) {
5902 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5903 for( i = 0; i < 4; i++ ) {
5904 const int index = 16 + 4 * c + i;
5905 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5906 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5909 } else {
5910 uint8_t * const nnz= &h->non_zero_count_cache[0];
5911 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5912 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5914 } else {
5915 uint8_t * const nnz= &h->non_zero_count_cache[0];
5916 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5917 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5918 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5919 h->last_qscale_diff = 0;
5922 s->current_picture.qscale_table[mb_xy]= s->qscale;
5923 write_back_non_zero_count(h);
5925 if(MB_MBAFF){
5926 h->ref_count[0] >>= 1;
5927 h->ref_count[1] >>= 1;
5930 return 0;
5934 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5935 const int index_a = qp + h->slice_alpha_c0_offset;
5936 const int alpha = (alpha_table+52)[index_a];
5937 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5939 if( bS[0] < 4 ) {
5940 int8_t tc[4];
5941 tc[0] = (tc0_table+52)[index_a][bS[0]];
5942 tc[1] = (tc0_table+52)[index_a][bS[1]];
5943 tc[2] = (tc0_table+52)[index_a][bS[2]];
5944 tc[3] = (tc0_table+52)[index_a][bS[3]];
5945 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5946 } else {
5947 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5950 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5951 const int index_a = qp + h->slice_alpha_c0_offset;
5952 const int alpha = (alpha_table+52)[index_a];
5953 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5955 if( bS[0] < 4 ) {
5956 int8_t tc[4];
5957 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5958 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5959 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5960 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5961 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5962 } else {
5963 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5967 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5968 int i;
5969 for( i = 0; i < 16; i++, pix += stride) {
5970 int index_a;
5971 int alpha;
5972 int beta;
5974 int qp_index;
5975 int bS_index = (i >> 1);
5976 if (!MB_FIELD) {
5977 bS_index &= ~1;
5978 bS_index |= (i & 1);
5981 if( bS[bS_index] == 0 ) {
5982 continue;
5985 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5986 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5987 alpha = (alpha_table+52)[index_a];
5988 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5990 if( bS[bS_index] < 4 ) {
5991 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5992 const int p0 = pix[-1];
5993 const int p1 = pix[-2];
5994 const int p2 = pix[-3];
5995 const int q0 = pix[0];
5996 const int q1 = pix[1];
5997 const int q2 = pix[2];
5999 if( FFABS( p0 - q0 ) < alpha &&
6000 FFABS( p1 - p0 ) < beta &&
6001 FFABS( q1 - q0 ) < beta ) {
6002 int tc = tc0;
6003 int i_delta;
6005 if( FFABS( p2 - p0 ) < beta ) {
6006 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6007 tc++;
6009 if( FFABS( q2 - q0 ) < beta ) {
6010 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6011 tc++;
6014 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6015 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6016 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6017 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6019 }else{
6020 const int p0 = pix[-1];
6021 const int p1 = pix[-2];
6022 const int p2 = pix[-3];
6024 const int q0 = pix[0];
6025 const int q1 = pix[1];
6026 const int q2 = pix[2];
6028 if( FFABS( p0 - q0 ) < alpha &&
6029 FFABS( p1 - p0 ) < beta &&
6030 FFABS( q1 - q0 ) < beta ) {
6032 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6033 if( FFABS( p2 - p0 ) < beta)
6035 const int p3 = pix[-4];
6036 /* p0', p1', p2' */
6037 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6038 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6039 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6040 } else {
6041 /* p0' */
6042 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6044 if( FFABS( q2 - q0 ) < beta)
6046 const int q3 = pix[3];
6047 /* q0', q1', q2' */
6048 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6049 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6050 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6051 } else {
6052 /* q0' */
6053 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6055 }else{
6056 /* p0', q0' */
6057 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6058 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6060 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6065 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6066 int i;
6067 for( i = 0; i < 8; i++, pix += stride) {
6068 int index_a;
6069 int alpha;
6070 int beta;
6072 int qp_index;
6073 int bS_index = i;
6075 if( bS[bS_index] == 0 ) {
6076 continue;
6079 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6080 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6081 alpha = (alpha_table+52)[index_a];
6082 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6084 if( bS[bS_index] < 4 ) {
6085 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6086 const int p0 = pix[-1];
6087 const int p1 = pix[-2];
6088 const int q0 = pix[0];
6089 const int q1 = pix[1];
6091 if( FFABS( p0 - q0 ) < alpha &&
6092 FFABS( p1 - p0 ) < beta &&
6093 FFABS( q1 - q0 ) < beta ) {
6094 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6096 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6097 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6098 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6100 }else{
6101 const int p0 = pix[-1];
6102 const int p1 = pix[-2];
6103 const int q0 = pix[0];
6104 const int q1 = pix[1];
6106 if( FFABS( p0 - q0 ) < alpha &&
6107 FFABS( p1 - p0 ) < beta &&
6108 FFABS( q1 - q0 ) < beta ) {
6110 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6111 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6112 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6118 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6119 const int index_a = qp + h->slice_alpha_c0_offset;
6120 const int alpha = (alpha_table+52)[index_a];
6121 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6123 if( bS[0] < 4 ) {
6124 int8_t tc[4];
6125 tc[0] = (tc0_table+52)[index_a][bS[0]];
6126 tc[1] = (tc0_table+52)[index_a][bS[1]];
6127 tc[2] = (tc0_table+52)[index_a][bS[2]];
6128 tc[3] = (tc0_table+52)[index_a][bS[3]];
6129 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6130 } else {
6131 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6135 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6136 const int index_a = qp + h->slice_alpha_c0_offset;
6137 const int alpha = (alpha_table+52)[index_a];
6138 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6140 if( bS[0] < 4 ) {
6141 int8_t tc[4];
6142 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6143 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6144 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6145 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6146 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6147 } else {
6148 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6152 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6153 MpegEncContext * const s = &h->s;
6154 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6155 int mb_xy, mb_type;
6156 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6158 mb_xy = h->mb_xy;
6160 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6161 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6162 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6163 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6164 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6165 return;
6167 assert(!FRAME_MBAFF);
6169 mb_type = s->current_picture.mb_type[mb_xy];
6170 qp = s->current_picture.qscale_table[mb_xy];
6171 qp0 = s->current_picture.qscale_table[mb_xy-1];
6172 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6173 qpc = get_chroma_qp( h, 0, qp );
6174 qpc0 = get_chroma_qp( h, 0, qp0 );
6175 qpc1 = get_chroma_qp( h, 0, qp1 );
6176 qp0 = (qp + qp0 + 1) >> 1;
6177 qp1 = (qp + qp1 + 1) >> 1;
6178 qpc0 = (qpc + qpc0 + 1) >> 1;
6179 qpc1 = (qpc + qpc1 + 1) >> 1;
6180 qp_thresh = 15 - h->slice_alpha_c0_offset;
6181 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6182 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6183 return;
6185 if( IS_INTRA(mb_type) ) {
6186 int16_t bS4[4] = {4,4,4,4};
6187 int16_t bS3[4] = {3,3,3,3};
6188 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6189 if( IS_8x8DCT(mb_type) ) {
6190 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6191 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6192 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6193 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6194 } else {
6195 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6196 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6197 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6198 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6199 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6200 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6201 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6202 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6204 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6205 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6206 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6207 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6208 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6209 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6210 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6211 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6212 return;
6213 } else {
6214 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6215 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6216 int edges;
6217 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6218 edges = 4;
6219 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6220 } else {
6221 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6222 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6223 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6224 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6225 ? 3 : 0;
6226 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6227 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6228 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6229 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6231 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6232 bSv[0][0] = 0x0004000400040004ULL;
6233 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6234 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6236 #define FILTER(hv,dir,edge)\
6237 if(bSv[dir][edge]) {\
6238 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6239 if(!(edge&1)) {\
6240 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6241 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6244 if( edges == 1 ) {
6245 FILTER(v,0,0);
6246 FILTER(h,1,0);
6247 } else if( IS_8x8DCT(mb_type) ) {
6248 FILTER(v,0,0);
6249 FILTER(v,0,2);
6250 FILTER(h,1,0);
6251 FILTER(h,1,2);
6252 } else {
6253 FILTER(v,0,0);
6254 FILTER(v,0,1);
6255 FILTER(v,0,2);
6256 FILTER(v,0,3);
6257 FILTER(h,1,0);
6258 FILTER(h,1,1);
6259 FILTER(h,1,2);
6260 FILTER(h,1,3);
6262 #undef FILTER
6267 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6268 MpegEncContext * const s = &h->s;
6269 int edge;
6270 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6271 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6272 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6273 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6274 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6276 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6277 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6278 // how often to recheck mv-based bS when iterating between edges
6279 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6280 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6281 // how often to recheck mv-based bS when iterating along each edge
6282 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6284 if (first_vertical_edge_done) {
6285 start = 1;
6288 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6289 start = 1;
6291 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6292 && !IS_INTERLACED(mb_type)
6293 && IS_INTERLACED(mbm_type)
6295 // This is a special case in the norm where the filtering must
6296 // be done twice (one each of the field) even if we are in a
6297 // frame macroblock.
6299 static const int nnz_idx[4] = {4,5,6,3};
6300 unsigned int tmp_linesize = 2 * linesize;
6301 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6302 int mbn_xy = mb_xy - 2 * s->mb_stride;
6303 int qp;
6304 int i, j;
6305 int16_t bS[4];
6307 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6308 if( IS_INTRA(mb_type) ||
6309 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6310 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6311 } else {
6312 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6313 for( i = 0; i < 4; i++ ) {
6314 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6315 mbn_nnz[nnz_idx[i]] != 0 )
6316 bS[i] = 2;
6317 else
6318 bS[i] = 1;
6321 // Do not use s->qscale as luma quantizer because it has not the same
6322 // value in IPCM macroblocks.
6323 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6324 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6325 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6326 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6327 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6328 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6329 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6330 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6333 start = 1;
6336 /* Calculate bS */
6337 for( edge = start; edge < edges; edge++ ) {
6338 /* mbn_xy: neighbor macroblock */
6339 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6340 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6341 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6342 int16_t bS[4];
6343 int qp;
6345 if( (edge&1) && IS_8x8DCT(mb_type) )
6346 continue;
6348 if( IS_INTRA(mb_type) ||
6349 IS_INTRA(mbn_type) ) {
6350 int value;
6351 if (edge == 0) {
6352 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6353 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6355 value = 4;
6356 } else {
6357 value = 3;
6359 } else {
6360 value = 3;
6362 bS[0] = bS[1] = bS[2] = bS[3] = value;
6363 } else {
6364 int i, l;
6365 int mv_done;
6367 if( edge & mask_edge ) {
6368 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6369 mv_done = 1;
6371 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6372 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6373 mv_done = 1;
6375 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6376 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6377 int bn_idx= b_idx - (dir ? 8:1);
6378 int v = 0;
6380 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6381 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6382 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6383 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6386 if(h->slice_type_nos == FF_B_TYPE && v){
6387 v=0;
6388 for( l = 0; !v && l < 2; l++ ) {
6389 int ln= 1-l;
6390 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6391 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6392 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6396 bS[0] = bS[1] = bS[2] = bS[3] = v;
6397 mv_done = 1;
6399 else
6400 mv_done = 0;
6402 for( i = 0; i < 4; i++ ) {
6403 int x = dir == 0 ? edge : i;
6404 int y = dir == 0 ? i : edge;
6405 int b_idx= 8 + 4 + x + 8*y;
6406 int bn_idx= b_idx - (dir ? 8:1);
6408 if( h->non_zero_count_cache[b_idx] |
6409 h->non_zero_count_cache[bn_idx] ) {
6410 bS[i] = 2;
6412 else if(!mv_done)
6414 bS[i] = 0;
6415 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6416 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6417 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6418 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6419 bS[i] = 1;
6420 break;
6424 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6425 bS[i] = 0;
6426 for( l = 0; l < 2; l++ ) {
6427 int ln= 1-l;
6428 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6429 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6430 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6431 bS[i] = 1;
6432 break;
6439 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6440 continue;
6443 /* Filter edge */
6444 // Do not use s->qscale as luma quantizer because it has not the same
6445 // value in IPCM macroblocks.
6446 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6447 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6448 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6449 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6450 if( dir == 0 ) {
6451 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6452 if( (edge&1) == 0 ) {
6453 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6454 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6455 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6456 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6458 } else {
6459 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6460 if( (edge&1) == 0 ) {
6461 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6462 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6463 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6464 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6470 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6471 MpegEncContext * const s = &h->s;
6472 const int mb_xy= mb_x + mb_y*s->mb_stride;
6473 const int mb_type = s->current_picture.mb_type[mb_xy];
6474 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6475 int first_vertical_edge_done = 0;
6476 av_unused int dir;
6478 //for sufficiently low qp, filtering wouldn't do anything
6479 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6480 if(!FRAME_MBAFF){
6481 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6482 int qp = s->current_picture.qscale_table[mb_xy];
6483 if(qp <= qp_thresh
6484 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6485 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6486 return;
6490 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6491 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6492 int top_type, left_type[2];
6493 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6494 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6495 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6497 if(IS_8x8DCT(top_type)){
6498 h->non_zero_count_cache[4+8*0]=
6499 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6500 h->non_zero_count_cache[6+8*0]=
6501 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6503 if(IS_8x8DCT(left_type[0])){
6504 h->non_zero_count_cache[3+8*1]=
6505 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6507 if(IS_8x8DCT(left_type[1])){
6508 h->non_zero_count_cache[3+8*3]=
6509 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6512 if(IS_8x8DCT(mb_type)){
6513 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6514 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6516 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6517 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6519 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6520 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6522 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6523 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6527 if (FRAME_MBAFF
6528 // left mb is in picture
6529 && h->slice_table[mb_xy-1] != 0xFFFF
6530 // and current and left pair do not have the same interlaced type
6531 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6532 // and left mb is in the same slice if deblocking_filter == 2
6533 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6534 /* First vertical edge is different in MBAFF frames
6535 * There are 8 different bS to compute and 2 different Qp
6537 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6538 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6539 int16_t bS[8];
6540 int qp[2];
6541 int bqp[2];
6542 int rqp[2];
6543 int mb_qp, mbn0_qp, mbn1_qp;
6544 int i;
6545 first_vertical_edge_done = 1;
6547 if( IS_INTRA(mb_type) )
6548 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6549 else {
6550 for( i = 0; i < 8; i++ ) {
6551 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6553 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6554 bS[i] = 4;
6555 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6556 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6557 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6559 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6560 bS[i] = 2;
6561 else
6562 bS[i] = 1;
6566 mb_qp = s->current_picture.qscale_table[mb_xy];
6567 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6568 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6569 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6570 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6571 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6572 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6573 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6574 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6575 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6576 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6577 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6578 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6580 /* Filter edge */
6581 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6582 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6583 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6584 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6585 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6588 #if CONFIG_SMALL
6589 for( dir = 0; dir < 2; dir++ )
6590 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6591 #else
6592 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6593 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6594 #endif
6597 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6598 H264Context *h = *(void**)arg;
6599 MpegEncContext * const s = &h->s;
6600 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6602 s->mb_skip_run= -1;
6604 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6605 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6607 if( h->pps.cabac ) {
6608 int i;
6610 /* realign */
6611 align_get_bits( &s->gb );
6613 /* init cabac */
6614 ff_init_cabac_states( &h->cabac);
6615 ff_init_cabac_decoder( &h->cabac,
6616 s->gb.buffer + get_bits_count(&s->gb)/8,
6617 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6618 /* calculate pre-state */
6619 for( i= 0; i < 460; i++ ) {
6620 int pre;
6621 if( h->slice_type_nos == FF_I_TYPE )
6622 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6623 else
6624 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6626 if( pre <= 63 )
6627 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6628 else
6629 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6632 for(;;){
6633 //START_TIMER
6634 int ret = decode_mb_cabac(h);
6635 int eos;
6636 //STOP_TIMER("decode_mb_cabac")
6638 if(ret>=0) hl_decode_mb(h);
6640 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6641 s->mb_y++;
6643 ret = decode_mb_cabac(h);
6645 if(ret>=0) hl_decode_mb(h);
6646 s->mb_y--;
6648 eos = get_cabac_terminate( &h->cabac );
6650 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6651 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6652 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6653 return -1;
6656 if( ++s->mb_x >= s->mb_width ) {
6657 s->mb_x = 0;
6658 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6659 ++s->mb_y;
6660 if(FIELD_OR_MBAFF_PICTURE) {
6661 ++s->mb_y;
6665 if( eos || s->mb_y >= s->mb_height ) {
6666 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6667 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6668 return 0;
6672 } else {
6673 for(;;){
6674 int ret = decode_mb_cavlc(h);
6676 if(ret>=0) hl_decode_mb(h);
6678 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6679 s->mb_y++;
6680 ret = decode_mb_cavlc(h);
6682 if(ret>=0) hl_decode_mb(h);
6683 s->mb_y--;
6686 if(ret<0){
6687 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6688 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6690 return -1;
6693 if(++s->mb_x >= s->mb_width){
6694 s->mb_x=0;
6695 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6696 ++s->mb_y;
6697 if(FIELD_OR_MBAFF_PICTURE) {
6698 ++s->mb_y;
6700 if(s->mb_y >= s->mb_height){
6701 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6703 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6704 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6706 return 0;
6707 }else{
6708 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6710 return -1;
6715 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6716 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6717 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6718 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6720 return 0;
6721 }else{
6722 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6724 return -1;
6730 #if 0
6731 for(;s->mb_y < s->mb_height; s->mb_y++){
6732 for(;s->mb_x < s->mb_width; s->mb_x++){
6733 int ret= decode_mb(h);
6735 hl_decode_mb(h);
6737 if(ret<0){
6738 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6739 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6741 return -1;
6744 if(++s->mb_x >= s->mb_width){
6745 s->mb_x=0;
6746 if(++s->mb_y >= s->mb_height){
6747 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6748 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6750 return 0;
6751 }else{
6752 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6754 return -1;
6759 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6760 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6761 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6763 return 0;
6764 }else{
6765 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6767 return -1;
6771 s->mb_x=0;
6772 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6774 #endif
6775 return -1; //not reached
6778 static int decode_picture_timing(H264Context *h){
6779 MpegEncContext * const s = &h->s;
6780 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6781 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6782 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6784 if(h->sps.pic_struct_present_flag){
6785 unsigned int i, num_clock_ts;
6786 h->sei_pic_struct = get_bits(&s->gb, 4);
6788 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6789 return -1;
6791 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6793 for (i = 0 ; i < num_clock_ts ; i++){
6794 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6795 unsigned int full_timestamp_flag;
6796 skip_bits(&s->gb, 2); /* ct_type */
6797 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6798 skip_bits(&s->gb, 5); /* counting_type */
6799 full_timestamp_flag = get_bits(&s->gb, 1);
6800 skip_bits(&s->gb, 1); /* discontinuity_flag */
6801 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6802 skip_bits(&s->gb, 8); /* n_frames */
6803 if(full_timestamp_flag){
6804 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6805 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6806 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6807 }else{
6808 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6809 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6810 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6811 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6812 if(get_bits(&s->gb, 1)) /* hours_flag */
6813 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6817 if(h->sps.time_offset_length > 0)
6818 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6822 return 0;
6825 static int decode_unregistered_user_data(H264Context *h, int size){
6826 MpegEncContext * const s = &h->s;
6827 uint8_t user_data[16+256];
6828 int e, build, i;
6830 if(size<16)
6831 return -1;
6833 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6834 user_data[i]= get_bits(&s->gb, 8);
6837 user_data[i]= 0;
6838 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6839 if(e==1 && build>=0)
6840 h->x264_build= build;
6842 if(s->avctx->debug & FF_DEBUG_BUGS)
6843 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6845 for(; i<size; i++)
6846 skip_bits(&s->gb, 8);
6848 return 0;
6851 static int decode_sei(H264Context *h){
6852 MpegEncContext * const s = &h->s;
6854 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6855 int size, type;
6857 type=0;
6859 type+= show_bits(&s->gb, 8);
6860 }while(get_bits(&s->gb, 8) == 255);
6862 size=0;
6864 size+= show_bits(&s->gb, 8);
6865 }while(get_bits(&s->gb, 8) == 255);
6867 switch(type){
6868 case 1: // Picture timing SEI
6869 if(decode_picture_timing(h) < 0)
6870 return -1;
6871 break;
6872 case 5:
6873 if(decode_unregistered_user_data(h, size) < 0)
6874 return -1;
6875 break;
6876 default:
6877 skip_bits(&s->gb, 8*size);
6880 //FIXME check bits here
6881 align_get_bits(&s->gb);
6884 return 0;
6887 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6888 MpegEncContext * const s = &h->s;
6889 int cpb_count, i;
6890 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6892 if(cpb_count > 32U){
6893 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6894 return -1;
6897 get_bits(&s->gb, 4); /* bit_rate_scale */
6898 get_bits(&s->gb, 4); /* cpb_size_scale */
6899 for(i=0; i<cpb_count; i++){
6900 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6901 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6902 get_bits1(&s->gb); /* cbr_flag */
6904 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6905 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6906 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6907 sps->time_offset_length = get_bits(&s->gb, 5);
6908 return 0;
6911 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6912 MpegEncContext * const s = &h->s;
6913 int aspect_ratio_info_present_flag;
6914 unsigned int aspect_ratio_idc;
6916 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6918 if( aspect_ratio_info_present_flag ) {
6919 aspect_ratio_idc= get_bits(&s->gb, 8);
6920 if( aspect_ratio_idc == EXTENDED_SAR ) {
6921 sps->sar.num= get_bits(&s->gb, 16);
6922 sps->sar.den= get_bits(&s->gb, 16);
6923 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6924 sps->sar= pixel_aspect[aspect_ratio_idc];
6925 }else{
6926 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6927 return -1;
6929 }else{
6930 sps->sar.num=
6931 sps->sar.den= 0;
6933 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6935 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6936 get_bits1(&s->gb); /* overscan_appropriate_flag */
6939 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6940 get_bits(&s->gb, 3); /* video_format */
6941 get_bits1(&s->gb); /* video_full_range_flag */
6942 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6943 get_bits(&s->gb, 8); /* colour_primaries */
6944 get_bits(&s->gb, 8); /* transfer_characteristics */
6945 get_bits(&s->gb, 8); /* matrix_coefficients */
6949 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6950 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6951 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6954 sps->timing_info_present_flag = get_bits1(&s->gb);
6955 if(sps->timing_info_present_flag){
6956 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6957 sps->time_scale = get_bits_long(&s->gb, 32);
6958 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6961 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6962 if(sps->nal_hrd_parameters_present_flag)
6963 if(decode_hrd_parameters(h, sps) < 0)
6964 return -1;
6965 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6966 if(sps->vcl_hrd_parameters_present_flag)
6967 if(decode_hrd_parameters(h, sps) < 0)
6968 return -1;
6969 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6970 get_bits1(&s->gb); /* low_delay_hrd_flag */
6971 sps->pic_struct_present_flag = get_bits1(&s->gb);
6973 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6974 if(sps->bitstream_restriction_flag){
6975 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6976 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6977 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6978 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6979 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6980 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6981 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6983 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6984 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6985 return -1;
6989 return 0;
6992 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6993 const uint8_t *jvt_list, const uint8_t *fallback_list){
6994 MpegEncContext * const s = &h->s;
6995 int i, last = 8, next = 8;
6996 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
6997 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6998 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6999 else
7000 for(i=0;i<size;i++){
7001 if(next)
7002 next = (last + get_se_golomb(&s->gb)) & 0xff;
7003 if(!i && !next){ /* matrix not written, we use the preset one */
7004 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7005 break;
7007 last = factors[scan[i]] = next ? next : last;
7011 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7012 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7013 MpegEncContext * const s = &h->s;
7014 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7015 const uint8_t *fallback[4] = {
7016 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7017 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7018 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7019 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7021 if(get_bits1(&s->gb)){
7022 sps->scaling_matrix_present |= is_sps;
7023 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7024 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7025 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7026 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7027 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7028 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7029 if(is_sps || pps->transform_8x8_mode){
7030 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7031 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7036 static inline int decode_seq_parameter_set(H264Context *h){
7037 MpegEncContext * const s = &h->s;
7038 int profile_idc, level_idc;
7039 unsigned int sps_id;
7040 int i;
7041 SPS *sps;
7043 profile_idc= get_bits(&s->gb, 8);
7044 get_bits1(&s->gb); //constraint_set0_flag
7045 get_bits1(&s->gb); //constraint_set1_flag
7046 get_bits1(&s->gb); //constraint_set2_flag
7047 get_bits1(&s->gb); //constraint_set3_flag
7048 get_bits(&s->gb, 4); // reserved
7049 level_idc= get_bits(&s->gb, 8);
7050 sps_id= get_ue_golomb_31(&s->gb);
7052 if(sps_id >= MAX_SPS_COUNT) {
7053 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7054 return -1;
7056 sps= av_mallocz(sizeof(SPS));
7057 if(sps == NULL)
7058 return -1;
7060 sps->profile_idc= profile_idc;
7061 sps->level_idc= level_idc;
7063 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7064 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7065 sps->scaling_matrix_present = 0;
7067 if(sps->profile_idc >= 100){ //high profile
7068 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7069 if(sps->chroma_format_idc == 3)
7070 sps->residual_color_transform_flag = get_bits1(&s->gb);
7071 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7072 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7073 sps->transform_bypass = get_bits1(&s->gb);
7074 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7075 }else{
7076 sps->chroma_format_idc= 1;
7079 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7080 sps->poc_type= get_ue_golomb_31(&s->gb);
7082 if(sps->poc_type == 0){ //FIXME #define
7083 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7084 } else if(sps->poc_type == 1){//FIXME #define
7085 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7086 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7087 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7088 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7090 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7091 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7092 goto fail;
7095 for(i=0; i<sps->poc_cycle_length; i++)
7096 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7097 }else if(sps->poc_type != 2){
7098 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7099 goto fail;
7102 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7103 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7104 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7105 goto fail;
7107 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7108 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7109 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7110 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7111 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7112 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7113 goto fail;
7116 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7117 if(!sps->frame_mbs_only_flag)
7118 sps->mb_aff= get_bits1(&s->gb);
7119 else
7120 sps->mb_aff= 0;
7122 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7124 #ifndef ALLOW_INTERLACE
7125 if(sps->mb_aff)
7126 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7127 #endif
7128 sps->crop= get_bits1(&s->gb);
7129 if(sps->crop){
7130 sps->crop_left = get_ue_golomb(&s->gb);
7131 sps->crop_right = get_ue_golomb(&s->gb);
7132 sps->crop_top = get_ue_golomb(&s->gb);
7133 sps->crop_bottom= get_ue_golomb(&s->gb);
7134 if(sps->crop_left || sps->crop_top){
7135 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7137 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7138 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7140 }else{
7141 sps->crop_left =
7142 sps->crop_right =
7143 sps->crop_top =
7144 sps->crop_bottom= 0;
7147 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7148 if( sps->vui_parameters_present_flag )
7149 decode_vui_parameters(h, sps);
7151 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7152 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7153 sps_id, sps->profile_idc, sps->level_idc,
7154 sps->poc_type,
7155 sps->ref_frame_count,
7156 sps->mb_width, sps->mb_height,
7157 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7158 sps->direct_8x8_inference_flag ? "8B8" : "",
7159 sps->crop_left, sps->crop_right,
7160 sps->crop_top, sps->crop_bottom,
7161 sps->vui_parameters_present_flag ? "VUI" : "",
7162 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7166 av_free(h->sps_buffers[sps_id]);
7167 h->sps_buffers[sps_id]= sps;
7168 return 0;
7169 fail:
7170 av_free(sps);
7171 return -1;
7174 static void
7175 build_qp_table(PPS *pps, int t, int index)
7177 int i;
7178 for(i = 0; i < 52; i++)
7179 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7182 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7183 MpegEncContext * const s = &h->s;
7184 unsigned int pps_id= get_ue_golomb(&s->gb);
7185 PPS *pps;
7187 if(pps_id >= MAX_PPS_COUNT) {
7188 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7189 return -1;
7192 pps= av_mallocz(sizeof(PPS));
7193 if(pps == NULL)
7194 return -1;
7195 pps->sps_id= get_ue_golomb_31(&s->gb);
7196 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7197 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7198 goto fail;
7201 pps->cabac= get_bits1(&s->gb);
7202 pps->pic_order_present= get_bits1(&s->gb);
7203 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7204 if(pps->slice_group_count > 1 ){
7205 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7206 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7207 switch(pps->mb_slice_group_map_type){
7208 case 0:
7209 #if 0
7210 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7211 | run_length[ i ] |1 |ue(v) |
7212 #endif
7213 break;
7214 case 2:
7215 #if 0
7216 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7217 |{ | | |
7218 | top_left_mb[ i ] |1 |ue(v) |
7219 | bottom_right_mb[ i ] |1 |ue(v) |
7220 | } | | |
7221 #endif
7222 break;
7223 case 3:
7224 case 4:
7225 case 5:
7226 #if 0
7227 | slice_group_change_direction_flag |1 |u(1) |
7228 | slice_group_change_rate_minus1 |1 |ue(v) |
7229 #endif
7230 break;
7231 case 6:
7232 #if 0
7233 | slice_group_id_cnt_minus1 |1 |ue(v) |
7234 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7235 |) | | |
7236 | slice_group_id[ i ] |1 |u(v) |
7237 #endif
7238 break;
7241 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7242 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7243 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7244 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7245 goto fail;
7248 pps->weighted_pred= get_bits1(&s->gb);
7249 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7250 pps->init_qp= get_se_golomb(&s->gb) + 26;
7251 pps->init_qs= get_se_golomb(&s->gb) + 26;
7252 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7253 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7254 pps->constrained_intra_pred= get_bits1(&s->gb);
7255 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7257 pps->transform_8x8_mode= 0;
7258 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7259 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7260 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7262 if(get_bits_count(&s->gb) < bit_length){
7263 pps->transform_8x8_mode= get_bits1(&s->gb);
7264 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7265 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7266 } else {
7267 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7270 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7271 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7272 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7273 h->pps.chroma_qp_diff= 1;
7275 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7276 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7277 pps_id, pps->sps_id,
7278 pps->cabac ? "CABAC" : "CAVLC",
7279 pps->slice_group_count,
7280 pps->ref_count[0], pps->ref_count[1],
7281 pps->weighted_pred ? "weighted" : "",
7282 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7283 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7284 pps->constrained_intra_pred ? "CONSTR" : "",
7285 pps->redundant_pic_cnt_present ? "REDU" : "",
7286 pps->transform_8x8_mode ? "8x8DCT" : ""
7290 av_free(h->pps_buffers[pps_id]);
7291 h->pps_buffers[pps_id]= pps;
7292 return 0;
7293 fail:
7294 av_free(pps);
7295 return -1;
7299 * Call decode_slice() for each context.
7301 * @param h h264 master context
7302 * @param context_count number of contexts to execute
7304 static void execute_decode_slices(H264Context *h, int context_count){
7305 MpegEncContext * const s = &h->s;
7306 AVCodecContext * const avctx= s->avctx;
7307 H264Context *hx;
7308 int i;
7310 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7311 return;
7312 if(context_count == 1) {
7313 decode_slice(avctx, &h);
7314 } else {
7315 for(i = 1; i < context_count; i++) {
7316 hx = h->thread_context[i];
7317 hx->s.error_recognition = avctx->error_recognition;
7318 hx->s.error_count = 0;
7321 avctx->execute(avctx, (void *)decode_slice,
7322 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7324 /* pull back stuff from slices to master context */
7325 hx = h->thread_context[context_count - 1];
7326 s->mb_x = hx->s.mb_x;
7327 s->mb_y = hx->s.mb_y;
7328 s->dropable = hx->s.dropable;
7329 s->picture_structure = hx->s.picture_structure;
7330 for(i = 1; i < context_count; i++)
7331 h->s.error_count += h->thread_context[i]->s.error_count;
7336 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7337 MpegEncContext * const s = &h->s;
7338 AVCodecContext * const avctx= s->avctx;
7339 int buf_index=0;
7340 H264Context *hx; ///< thread context
7341 int context_count = 0;
7343 h->max_contexts = avctx->thread_count;
7344 #if 0
7345 int i;
7346 for(i=0; i<50; i++){
7347 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7349 #endif
7350 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7351 h->current_slice = 0;
7352 if (!s->first_field)
7353 s->current_picture_ptr= NULL;
7356 for(;;){
7357 int consumed;
7358 int dst_length;
7359 int bit_length;
7360 const uint8_t *ptr;
7361 int i, nalsize = 0;
7362 int err;
7364 if(h->is_avc) {
7365 if(buf_index >= buf_size) break;
7366 nalsize = 0;
7367 for(i = 0; i < h->nal_length_size; i++)
7368 nalsize = (nalsize << 8) | buf[buf_index++];
7369 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7370 if(nalsize == 1){
7371 buf_index++;
7372 continue;
7373 }else{
7374 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7375 break;
7378 } else {
7379 // start code prefix search
7380 for(; buf_index + 3 < buf_size; buf_index++){
7381 // This should always succeed in the first iteration.
7382 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7383 break;
7386 if(buf_index+3 >= buf_size) break;
7388 buf_index+=3;
7391 hx = h->thread_context[context_count];
7393 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7394 if (ptr==NULL || dst_length < 0){
7395 return -1;
7397 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7398 dst_length--;
7399 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7401 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7402 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7405 if (h->is_avc && (nalsize != consumed)){
7406 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7407 consumed= nalsize;
7410 buf_index += consumed;
7412 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7413 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7414 continue;
7416 again:
7417 err = 0;
7418 switch(hx->nal_unit_type){
7419 case NAL_IDR_SLICE:
7420 if (h->nal_unit_type != NAL_IDR_SLICE) {
7421 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7422 return -1;
7424 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7425 case NAL_SLICE:
7426 init_get_bits(&hx->s.gb, ptr, bit_length);
7427 hx->intra_gb_ptr=
7428 hx->inter_gb_ptr= &hx->s.gb;
7429 hx->s.data_partitioning = 0;
7431 if((err = decode_slice_header(hx, h)))
7432 break;
7434 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7435 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7436 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7437 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7438 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7439 && avctx->skip_frame < AVDISCARD_ALL){
7440 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7441 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7442 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7443 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7444 }else
7445 context_count++;
7447 break;
7448 case NAL_DPA:
7449 init_get_bits(&hx->s.gb, ptr, bit_length);
7450 hx->intra_gb_ptr=
7451 hx->inter_gb_ptr= NULL;
7452 hx->s.data_partitioning = 1;
7454 err = decode_slice_header(hx, h);
7455 break;
7456 case NAL_DPB:
7457 init_get_bits(&hx->intra_gb, ptr, bit_length);
7458 hx->intra_gb_ptr= &hx->intra_gb;
7459 break;
7460 case NAL_DPC:
7461 init_get_bits(&hx->inter_gb, ptr, bit_length);
7462 hx->inter_gb_ptr= &hx->inter_gb;
7464 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7465 && s->context_initialized
7466 && s->hurry_up < 5
7467 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7468 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7469 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7470 && avctx->skip_frame < AVDISCARD_ALL)
7471 context_count++;
7472 break;
7473 case NAL_SEI:
7474 init_get_bits(&s->gb, ptr, bit_length);
7475 decode_sei(h);
7476 break;
7477 case NAL_SPS:
7478 init_get_bits(&s->gb, ptr, bit_length);
7479 decode_seq_parameter_set(h);
7481 if(s->flags& CODEC_FLAG_LOW_DELAY)
7482 s->low_delay=1;
7484 if(avctx->has_b_frames < 2)
7485 avctx->has_b_frames= !s->low_delay;
7486 break;
7487 case NAL_PPS:
7488 init_get_bits(&s->gb, ptr, bit_length);
7490 decode_picture_parameter_set(h, bit_length);
7492 break;
7493 case NAL_AUD:
7494 case NAL_END_SEQUENCE:
7495 case NAL_END_STREAM:
7496 case NAL_FILLER_DATA:
7497 case NAL_SPS_EXT:
7498 case NAL_AUXILIARY_SLICE:
7499 break;
7500 default:
7501 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7504 if(context_count == h->max_contexts) {
7505 execute_decode_slices(h, context_count);
7506 context_count = 0;
7509 if (err < 0)
7510 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7511 else if(err == 1) {
7512 /* Slice could not be decoded in parallel mode, copy down
7513 * NAL unit stuff to context 0 and restart. Note that
7514 * rbsp_buffer is not transferred, but since we no longer
7515 * run in parallel mode this should not be an issue. */
7516 h->nal_unit_type = hx->nal_unit_type;
7517 h->nal_ref_idc = hx->nal_ref_idc;
7518 hx = h;
7519 goto again;
7522 if(context_count)
7523 execute_decode_slices(h, context_count);
7524 return buf_index;
7528 * returns the number of bytes consumed for building the current frame
7530 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7531 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7532 if(pos+10>buf_size) pos=buf_size; // oops ;)
7534 return pos;
7537 static int decode_frame(AVCodecContext *avctx,
7538 void *data, int *data_size,
7539 const uint8_t *buf, int buf_size)
7541 H264Context *h = avctx->priv_data;
7542 MpegEncContext *s = &h->s;
7543 AVFrame *pict = data;
7544 int buf_index;
7546 s->flags= avctx->flags;
7547 s->flags2= avctx->flags2;
7549 /* end of stream, output what is still in the buffers */
7550 if (buf_size == 0) {
7551 Picture *out;
7552 int i, out_idx;
7554 //FIXME factorize this with the output code below
7555 out = h->delayed_pic[0];
7556 out_idx = 0;
7557 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7558 if(h->delayed_pic[i]->poc < out->poc){
7559 out = h->delayed_pic[i];
7560 out_idx = i;
7563 for(i=out_idx; h->delayed_pic[i]; i++)
7564 h->delayed_pic[i] = h->delayed_pic[i+1];
7566 if(out){
7567 *data_size = sizeof(AVFrame);
7568 *pict= *(AVFrame*)out;
7571 return 0;
7574 if(h->is_avc && !h->got_avcC) {
7575 int i, cnt, nalsize;
7576 unsigned char *p = avctx->extradata;
7577 if(avctx->extradata_size < 7) {
7578 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7579 return -1;
7581 if(*p != 1) {
7582 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7583 return -1;
7585 /* sps and pps in the avcC always have length coded with 2 bytes,
7586 so put a fake nal_length_size = 2 while parsing them */
7587 h->nal_length_size = 2;
7588 // Decode sps from avcC
7589 cnt = *(p+5) & 0x1f; // Number of sps
7590 p += 6;
7591 for (i = 0; i < cnt; i++) {
7592 nalsize = AV_RB16(p) + 2;
7593 if(decode_nal_units(h, p, nalsize) < 0) {
7594 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7595 return -1;
7597 p += nalsize;
7599 // Decode pps from avcC
7600 cnt = *(p++); // Number of pps
7601 for (i = 0; i < cnt; i++) {
7602 nalsize = AV_RB16(p) + 2;
7603 if(decode_nal_units(h, p, nalsize) != nalsize) {
7604 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7605 return -1;
7607 p += nalsize;
7609 // Now store right nal length size, that will be use to parse all other nals
7610 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7611 // Do not reparse avcC
7612 h->got_avcC = 1;
7615 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7616 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7617 return -1;
7618 h->got_avcC = 1;
7621 buf_index=decode_nal_units(h, buf, buf_size);
7622 if(buf_index < 0)
7623 return -1;
7625 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7626 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7627 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7628 return -1;
7631 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7632 Picture *out = s->current_picture_ptr;
7633 Picture *cur = s->current_picture_ptr;
7634 int i, pics, cross_idr, out_of_order, out_idx;
7636 s->mb_y= 0;
7638 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7639 s->current_picture_ptr->pict_type= s->pict_type;
7641 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7642 ff_vdpau_h264_set_reference_frames(s);
7644 if(!s->dropable) {
7645 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7646 h->prev_poc_msb= h->poc_msb;
7647 h->prev_poc_lsb= h->poc_lsb;
7649 h->prev_frame_num_offset= h->frame_num_offset;
7650 h->prev_frame_num= h->frame_num;
7652 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7653 ff_vdpau_h264_picture_complete(s);
7656 * FIXME: Error handling code does not seem to support interlaced
7657 * when slices span multiple rows
7658 * The ff_er_add_slice calls don't work right for bottom
7659 * fields; they cause massive erroneous error concealing
7660 * Error marking covers both fields (top and bottom).
7661 * This causes a mismatched s->error_count
7662 * and a bad error table. Further, the error count goes to
7663 * INT_MAX when called for bottom field, because mb_y is
7664 * past end by one (callers fault) and resync_mb_y != 0
7665 * causes problems for the first MB line, too.
7667 if (!FIELD_PICTURE)
7668 ff_er_frame_end(s);
7670 MPV_frame_end(s);
7672 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7673 /* Wait for second field. */
7674 *data_size = 0;
7676 } else {
7677 cur->repeat_pict = 0;
7679 /* Signal interlacing information externally. */
7680 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7681 if(h->sps.pic_struct_present_flag){
7682 switch (h->sei_pic_struct)
7684 case SEI_PIC_STRUCT_FRAME:
7685 cur->interlaced_frame = 0;
7686 break;
7687 case SEI_PIC_STRUCT_TOP_FIELD:
7688 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7689 case SEI_PIC_STRUCT_TOP_BOTTOM:
7690 case SEI_PIC_STRUCT_BOTTOM_TOP:
7691 cur->interlaced_frame = 1;
7692 break;
7693 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7694 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7695 // Signal the possibility of telecined film externally (pic_struct 5,6)
7696 // From these hints, let the applications decide if they apply deinterlacing.
7697 cur->repeat_pict = 1;
7698 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7699 break;
7700 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7701 // Force progressive here, as doubling interlaced frame is a bad idea.
7702 cur->interlaced_frame = 0;
7703 cur->repeat_pict = 2;
7704 break;
7705 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7706 cur->interlaced_frame = 0;
7707 cur->repeat_pict = 4;
7708 break;
7710 }else{
7711 /* Derive interlacing flag from used decoding process. */
7712 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7715 if (cur->field_poc[0] != cur->field_poc[1]){
7716 /* Derive top_field_first from field pocs. */
7717 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7718 }else{
7719 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7720 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7721 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7722 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7723 cur->top_field_first = 1;
7724 else
7725 cur->top_field_first = 0;
7726 }else{
7727 /* Most likely progressive */
7728 cur->top_field_first = 0;
7732 //FIXME do something with unavailable reference frames
7734 /* Sort B-frames into display order */
7736 if(h->sps.bitstream_restriction_flag
7737 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7738 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7739 s->low_delay = 0;
7742 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7743 && !h->sps.bitstream_restriction_flag){
7744 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7745 s->low_delay= 0;
7748 pics = 0;
7749 while(h->delayed_pic[pics]) pics++;
7751 assert(pics <= MAX_DELAYED_PIC_COUNT);
7753 h->delayed_pic[pics++] = cur;
7754 if(cur->reference == 0)
7755 cur->reference = DELAYED_PIC_REF;
7757 out = h->delayed_pic[0];
7758 out_idx = 0;
7759 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7760 if(h->delayed_pic[i]->poc < out->poc){
7761 out = h->delayed_pic[i];
7762 out_idx = i;
7764 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7766 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7768 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7770 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7771 || (s->low_delay &&
7772 ((!cross_idr && out->poc > h->outputed_poc + 2)
7773 || cur->pict_type == FF_B_TYPE)))
7775 s->low_delay = 0;
7776 s->avctx->has_b_frames++;
7779 if(out_of_order || pics > s->avctx->has_b_frames){
7780 out->reference &= ~DELAYED_PIC_REF;
7781 for(i=out_idx; h->delayed_pic[i]; i++)
7782 h->delayed_pic[i] = h->delayed_pic[i+1];
7784 if(!out_of_order && pics > s->avctx->has_b_frames){
7785 *data_size = sizeof(AVFrame);
7787 h->outputed_poc = out->poc;
7788 *pict= *(AVFrame*)out;
7789 }else{
7790 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7795 assert(pict->data[0] || !*data_size);
7796 ff_print_debug_info(s, pict);
7797 //printf("out %d\n", (int)pict->data[0]);
7798 #if 0 //?
7800 /* Return the Picture timestamp as the frame number */
7801 /* we subtract 1 because it is added on utils.c */
7802 avctx->frame_number = s->picture_number - 1;
7803 #endif
7804 return get_consumed_bytes(s, buf_index, buf_size);
7806 #if 0
7807 static inline void fill_mb_avail(H264Context *h){
7808 MpegEncContext * const s = &h->s;
7809 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7811 if(s->mb_y){
7812 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7813 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7814 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7815 }else{
7816 h->mb_avail[0]=
7817 h->mb_avail[1]=
7818 h->mb_avail[2]= 0;
7820 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7821 h->mb_avail[4]= 1; //FIXME move out
7822 h->mb_avail[5]= 0; //FIXME move out
7824 #endif
7826 #ifdef TEST
7827 #undef printf
7828 #undef random
7829 #define COUNT 8000
7830 #define SIZE (COUNT*40)
7831 int main(void){
7832 int i;
7833 uint8_t temp[SIZE];
7834 PutBitContext pb;
7835 GetBitContext gb;
7836 // int int_temp[10000];
7837 DSPContext dsp;
7838 AVCodecContext avctx;
7840 dsputil_init(&dsp, &avctx);
7842 init_put_bits(&pb, temp, SIZE);
7843 printf("testing unsigned exp golomb\n");
7844 for(i=0; i<COUNT; i++){
7845 START_TIMER
7846 set_ue_golomb(&pb, i);
7847 STOP_TIMER("set_ue_golomb");
7849 flush_put_bits(&pb);
7851 init_get_bits(&gb, temp, 8*SIZE);
7852 for(i=0; i<COUNT; i++){
7853 int j, s;
7855 s= show_bits(&gb, 24);
7857 START_TIMER
7858 j= get_ue_golomb(&gb);
7859 if(j != i){
7860 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7861 // return -1;
7863 STOP_TIMER("get_ue_golomb");
7867 init_put_bits(&pb, temp, SIZE);
7868 printf("testing signed exp golomb\n");
7869 for(i=0; i<COUNT; i++){
7870 START_TIMER
7871 set_se_golomb(&pb, i - COUNT/2);
7872 STOP_TIMER("set_se_golomb");
7874 flush_put_bits(&pb);
7876 init_get_bits(&gb, temp, 8*SIZE);
7877 for(i=0; i<COUNT; i++){
7878 int j, s;
7880 s= show_bits(&gb, 24);
7882 START_TIMER
7883 j= get_se_golomb(&gb);
7884 if(j != i - COUNT/2){
7885 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7886 // return -1;
7888 STOP_TIMER("get_se_golomb");
7891 #if 0
7892 printf("testing 4x4 (I)DCT\n");
7894 DCTELEM block[16];
7895 uint8_t src[16], ref[16];
7896 uint64_t error= 0, max_error=0;
7898 for(i=0; i<COUNT; i++){
7899 int j;
7900 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7901 for(j=0; j<16; j++){
7902 ref[j]= random()%255;
7903 src[j]= random()%255;
7906 h264_diff_dct_c(block, src, ref, 4);
7908 //normalize
7909 for(j=0; j<16; j++){
7910 // printf("%d ", block[j]);
7911 block[j]= block[j]*4;
7912 if(j&1) block[j]= (block[j]*4 + 2)/5;
7913 if(j&4) block[j]= (block[j]*4 + 2)/5;
7915 // printf("\n");
7917 s->dsp.h264_idct_add(ref, block, 4);
7918 /* for(j=0; j<16; j++){
7919 printf("%d ", ref[j]);
7921 printf("\n");*/
7923 for(j=0; j<16; j++){
7924 int diff= FFABS(src[j] - ref[j]);
7926 error+= diff*diff;
7927 max_error= FFMAX(max_error, diff);
7930 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7931 printf("testing quantizer\n");
7932 for(qp=0; qp<52; qp++){
7933 for(i=0; i<16; i++)
7934 src1_block[i]= src2_block[i]= random()%255;
7937 printf("Testing NAL layer\n");
7939 uint8_t bitstream[COUNT];
7940 uint8_t nal[COUNT*2];
7941 H264Context h;
7942 memset(&h, 0, sizeof(H264Context));
7944 for(i=0; i<COUNT; i++){
7945 int zeros= i;
7946 int nal_length;
7947 int consumed;
7948 int out_length;
7949 uint8_t *out;
7950 int j;
7952 for(j=0; j<COUNT; j++){
7953 bitstream[j]= (random() % 255) + 1;
7956 for(j=0; j<zeros; j++){
7957 int pos= random() % COUNT;
7958 while(bitstream[pos] == 0){
7959 pos++;
7960 pos %= COUNT;
7962 bitstream[pos]=0;
7965 START_TIMER
7967 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7968 if(nal_length<0){
7969 printf("encoding failed\n");
7970 return -1;
7973 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7975 STOP_TIMER("NAL")
7977 if(out_length != COUNT){
7978 printf("incorrect length %d %d\n", out_length, COUNT);
7979 return -1;
7982 if(consumed != nal_length){
7983 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7984 return -1;
7987 if(memcmp(bitstream, out, COUNT)){
7988 printf("mismatch\n");
7989 return -1;
7992 #endif
7994 printf("Testing RBSP\n");
7997 return 0;
7999 #endif /* TEST */
8002 static av_cold int decode_end(AVCodecContext *avctx)
8004 H264Context *h = avctx->priv_data;
8005 MpegEncContext *s = &h->s;
8006 int i;
8008 av_freep(&h->rbsp_buffer[0]);
8009 av_freep(&h->rbsp_buffer[1]);
8010 free_tables(h); //FIXME cleanup init stuff perhaps
8012 for(i = 0; i < MAX_SPS_COUNT; i++)
8013 av_freep(h->sps_buffers + i);
8015 for(i = 0; i < MAX_PPS_COUNT; i++)
8016 av_freep(h->pps_buffers + i);
8018 MPV_common_end(s);
8020 // memset(h, 0, sizeof(H264Context));
8022 return 0;
8026 AVCodec h264_decoder = {
8027 "h264",
8028 CODEC_TYPE_VIDEO,
8029 CODEC_ID_H264,
8030 sizeof(H264Context),
8031 decode_init,
8032 NULL,
8033 decode_end,
8034 decode_frame,
8035 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8036 .flush= flush_dpb,
8037 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8040 #if CONFIG_H264_VDPAU_DECODER
8041 AVCodec h264_vdpau_decoder = {
8042 "h264_vdpau",
8043 CODEC_TYPE_VIDEO,
8044 CODEC_ID_H264,
8045 sizeof(H264Context),
8046 decode_init,
8047 NULL,
8048 decode_end,
8049 decode_frame,
8050 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8051 .flush= flush_dpb,
8052 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8054 #endif
8056 #if CONFIG_SVQ3_DECODER
8057 #include "svq3.c"
8058 #endif