2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc
[4];
52 static VLC chroma_dc_coeff_token_vlc
;
54 static VLC total_zeros_vlc
[15];
55 static VLC chroma_dc_total_zeros_vlc
[3];
57 static VLC run_vlc
[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
);
61 static void svq3_add_idct_c(uint8_t *dst
, DCTELEM
*block
, int stride
, int qp
, int dc
);
62 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
63 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
65 static av_always_inline
uint32_t pack16to32(int a
, int b
){
66 #ifdef WORDS_BIGENDIAN
67 return (b
&0xFFFF) + (a
<<16);
69 return (a
&0xFFFF) + (b
<<16);
73 const uint8_t ff_rem6
[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6
[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context
*h
, int mb_type
, int for_deblock
){
83 MpegEncContext
* const s
= &h
->s
;
84 const int mb_xy
= h
->mb_xy
;
85 int topleft_xy
, top_xy
, topright_xy
, left_xy
[2];
86 int topleft_type
, top_type
, topright_type
, left_type
[2];
88 int topleft_partition
= -1;
91 top_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock
&& (h
->slice_num
== 1 || h
->slice_table
[mb_xy
] == h
->slice_table
[top_xy
]) && !FRAME_MBAFF
)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy
= top_xy
- 1;
101 topright_xy
= top_xy
+ 1;
102 left_xy
[1] = left_xy
[0] = mb_xy
-1;
112 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
113 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
114 const int topleft_pair_xy
= top_pair_xy
- 1;
115 const int topright_pair_xy
= top_pair_xy
+ 1;
116 const int topleft_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topleft_pair_xy
]);
117 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
118 const int topright_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topright_pair_xy
]);
119 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
120 const int curr_mb_frame_flag
= !IS_INTERLACED(mb_type
);
121 const int bottom
= (s
->mb_y
& 1);
122 tprintf(s
->avctx
, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag
, left_mb_frame_flag
, topleft_mb_frame_flag
, top_mb_frame_flag
, topright_mb_frame_flag
);
124 ? !curr_mb_frame_flag
// bottom macroblock
125 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
127 top_xy
-= s
->mb_stride
;
130 ? !curr_mb_frame_flag
// bottom macroblock
131 : (!curr_mb_frame_flag
&& !topleft_mb_frame_flag
) // top macroblock
133 topleft_xy
-= s
->mb_stride
;
134 } else if(bottom
&& curr_mb_frame_flag
&& !left_mb_frame_flag
) {
135 topleft_xy
+= s
->mb_stride
;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition
= 0;
140 ? !curr_mb_frame_flag
// bottom macroblock
141 : (!curr_mb_frame_flag
&& !topright_mb_frame_flag
) // top macroblock
143 topright_xy
-= s
->mb_stride
;
145 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
146 left_xy
[1] = left_xy
[0] = pair_xy
- 1;
147 if (curr_mb_frame_flag
) {
168 left_xy
[1] += s
->mb_stride
;
181 h
->top_mb_xy
= top_xy
;
182 h
->left_mb_xy
[0] = left_xy
[0];
183 h
->left_mb_xy
[1] = left_xy
[1];
187 top_type
= h
->slice_table
[top_xy
] < 255 ? s
->current_picture
.mb_type
[top_xy
] : 0;
188 left_type
[0] = h
->slice_table
[left_xy
[0] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
189 left_type
[1] = h
->slice_table
[left_xy
[1] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
191 if(FRAME_MBAFF
&& !IS_INTRA(mb_type
)){
193 int v
= *(uint16_t*)&h
->non_zero_count
[mb_xy
][14];
195 h
->non_zero_count_cache
[scan8
[i
]] = (v
>>i
)&1;
196 for(list
=0; list
<h
->list_count
; list
++){
197 if(USES_LIST(mb_type
,list
)){
198 uint32_t *src
= (uint32_t*)s
->current_picture
.motion_val
[list
][h
->mb2b_xy
[mb_xy
]];
199 uint32_t *dst
= (uint32_t*)h
->mv_cache
[list
][scan8
[0]];
200 int8_t *ref
= &s
->current_picture
.ref_index
[list
][h
->mb2b8_xy
[mb_xy
]];
201 for(i
=0; i
<4; i
++, dst
+=8, src
+=h
->b_stride
){
207 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 0]] =
208 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 2]] = pack16to32(ref
[0],ref
[1])*0x0101;
210 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 8]] =
211 *(uint32_t*)&h
->ref_cache
[list
][scan8
[10]] = pack16to32(ref
[0],ref
[1])*0x0101;
213 fill_rectangle(&h
-> mv_cache
[list
][scan8
[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h
->ref_cache
[list
][scan8
[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1);
219 topleft_type
= h
->slice_table
[topleft_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topleft_xy
] : 0;
220 top_type
= h
->slice_table
[top_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[top_xy
] : 0;
221 topright_type
= h
->slice_table
[topright_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topright_xy
]: 0;
222 left_type
[0] = h
->slice_table
[left_xy
[0] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
223 left_type
[1] = h
->slice_table
[left_xy
[1] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
226 if(IS_INTRA(mb_type
)){
227 h
->topleft_samples_available
=
228 h
->top_samples_available
=
229 h
->left_samples_available
= 0xFFFF;
230 h
->topright_samples_available
= 0xEEEA;
232 if(!IS_INTRA(top_type
) && (top_type
==0 || h
->pps
.constrained_intra_pred
)){
233 h
->topleft_samples_available
= 0xB3FF;
234 h
->top_samples_available
= 0x33FF;
235 h
->topright_samples_available
= 0x26EA;
238 if(!IS_INTRA(left_type
[i
]) && (left_type
[i
]==0 || h
->pps
.constrained_intra_pred
)){
239 h
->topleft_samples_available
&= 0xDF5F;
240 h
->left_samples_available
&= 0x5F5F;
244 if(!IS_INTRA(topleft_type
) && (topleft_type
==0 || h
->pps
.constrained_intra_pred
))
245 h
->topleft_samples_available
&= 0x7FFF;
247 if(!IS_INTRA(topright_type
) && (topright_type
==0 || h
->pps
.constrained_intra_pred
))
248 h
->topright_samples_available
&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type
)){
251 if(IS_INTRA4x4(top_type
)){
252 h
->intra4x4_pred_mode_cache
[4+8*0]= h
->intra4x4_pred_mode
[top_xy
][4];
253 h
->intra4x4_pred_mode_cache
[5+8*0]= h
->intra4x4_pred_mode
[top_xy
][5];
254 h
->intra4x4_pred_mode_cache
[6+8*0]= h
->intra4x4_pred_mode
[top_xy
][6];
255 h
->intra4x4_pred_mode_cache
[7+8*0]= h
->intra4x4_pred_mode
[top_xy
][3];
258 if(!top_type
|| (IS_INTER(top_type
) && h
->pps
.constrained_intra_pred
))
263 h
->intra4x4_pred_mode_cache
[4+8*0]=
264 h
->intra4x4_pred_mode_cache
[5+8*0]=
265 h
->intra4x4_pred_mode_cache
[6+8*0]=
266 h
->intra4x4_pred_mode_cache
[7+8*0]= pred
;
269 if(IS_INTRA4x4(left_type
[i
])){
270 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[0+2*i
]];
271 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[1+2*i
]];
274 if(!left_type
[i
] || (IS_INTER(left_type
[i
]) && h
->pps
.constrained_intra_pred
))
279 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]=
280 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= pred
;
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
297 h
->non_zero_count_cache
[4+8*0]= h
->non_zero_count
[top_xy
][4];
298 h
->non_zero_count_cache
[5+8*0]= h
->non_zero_count
[top_xy
][5];
299 h
->non_zero_count_cache
[6+8*0]= h
->non_zero_count
[top_xy
][6];
300 h
->non_zero_count_cache
[7+8*0]= h
->non_zero_count
[top_xy
][3];
302 h
->non_zero_count_cache
[1+8*0]= h
->non_zero_count
[top_xy
][9];
303 h
->non_zero_count_cache
[2+8*0]= h
->non_zero_count
[top_xy
][8];
305 h
->non_zero_count_cache
[1+8*3]= h
->non_zero_count
[top_xy
][12];
306 h
->non_zero_count_cache
[2+8*3]= h
->non_zero_count
[top_xy
][11];
309 h
->non_zero_count_cache
[4+8*0]=
310 h
->non_zero_count_cache
[5+8*0]=
311 h
->non_zero_count_cache
[6+8*0]=
312 h
->non_zero_count_cache
[7+8*0]=
314 h
->non_zero_count_cache
[1+8*0]=
315 h
->non_zero_count_cache
[2+8*0]=
317 h
->non_zero_count_cache
[1+8*3]=
318 h
->non_zero_count_cache
[2+8*3]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
322 for (i
=0; i
<2; i
++) {
324 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[0+2*i
]];
325 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[1+2*i
]];
326 h
->non_zero_count_cache
[0+8*1 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[4+2*i
]];
327 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[5+2*i
]];
329 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]=
330 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]=
331 h
->non_zero_count_cache
[0+8*1 + 8*i
]=
332 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
339 h
->top_cbp
= h
->cbp_table
[top_xy
];
340 } else if(IS_INTRA(mb_type
)) {
347 h
->left_cbp
= h
->cbp_table
[left_xy
[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type
)) {
354 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[0]]>>((left_block
[0]&(~1))+1))&0x1) << 1;
357 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[1]]>>((left_block
[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type
) || IS_DIRECT(mb_type
)){
364 for(list
=0; list
<h
->list_count
; list
++){
365 if(!USES_LIST(mb_type
, list
) && !IS_DIRECT(mb_type
) && !h
->deblocking_filter
){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h
->mv_cache_clean
[list
]= 0;
375 if(USES_LIST(top_type
, list
)){
376 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
377 const int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
378 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0];
379 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 1];
380 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2];
381 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 3];
382 h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]=
383 h
->ref_cache
[list
][scan8
[0] + 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 0];
384 h
->ref_cache
[list
][scan8
[0] + 2 - 1*8]=
385 h
->ref_cache
[list
][scan8
[0] + 3 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 1];
387 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]=
388 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]=
389 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]=
390 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]= ((top_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
)&0xFF)*0x01010101;
395 int cache_idx
= scan8
[0] - 1 + i
*2*8;
396 if(USES_LIST(left_type
[i
], list
)){
397 const int b_xy
= h
->mb2b_xy
[left_xy
[i
]] + 3;
398 const int b8_xy
= h
->mb2b8_xy
[left_xy
[i
]] + 1;
399 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[0+i
*2]];
400 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[1+i
*2]];
401 h
->ref_cache
[list
][cache_idx
]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[0+i
*2]>>1)];
402 h
->ref_cache
[list
][cache_idx
+8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[1+i
*2]>>1)];
404 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]=
405 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= 0;
406 h
->ref_cache
[list
][cache_idx
]=
407 h
->ref_cache
[list
][cache_idx
+8]= left_type
[i
] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
411 if((for_deblock
|| (IS_DIRECT(mb_type
) && !h
->direct_spatial_mv_pred
)) && !FRAME_MBAFF
)
414 if(USES_LIST(topleft_type
, list
)){
415 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + h
->b_stride
+ (topleft_partition
& 2*h
->b_stride
);
416 const int b8_xy
= h
->mb2b8_xy
[topleft_xy
] + 1 + (topleft_partition
& h
->b8_stride
);
417 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
418 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
420 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= 0;
421 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= topleft_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
424 if(USES_LIST(topright_type
, list
)){
425 const int b_xy
= h
->mb2b_xy
[topright_xy
] + 3*h
->b_stride
;
426 const int b8_xy
= h
->mb2b8_xy
[topright_xy
] + h
->b8_stride
;
427 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
428 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
430 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= 0;
431 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= topright_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
434 if((IS_SKIP(mb_type
) || IS_DIRECT(mb_type
)) && !FRAME_MBAFF
)
437 h
->ref_cache
[list
][scan8
[5 ]+1] =
438 h
->ref_cache
[list
][scan8
[7 ]+1] =
439 h
->ref_cache
[list
][scan8
[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h
->ref_cache
[list
][scan8
[4 ]] =
441 h
->ref_cache
[list
][scan8
[12]] = PART_NOT_AVAILABLE
;
442 *(uint32_t*)h
->mv_cache
[list
][scan8
[5 ]+1]=
443 *(uint32_t*)h
->mv_cache
[list
][scan8
[7 ]+1]=
444 *(uint32_t*)h
->mv_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h
->mv_cache
[list
][scan8
[4 ]]=
446 *(uint32_t*)h
->mv_cache
[list
][scan8
[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type
, list
)){
451 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
452 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 0];
453 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 1];
454 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 2];
455 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 3];
457 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]=
458 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]=
459 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]=
460 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type
[0], list
)){
463 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
464 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
465 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
467 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]=
468 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type
[1], list
)){
471 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
472 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
473 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
475 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]=
476 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h
->mvd_cache
[list
][scan8
[5 ]+1]=
479 *(uint32_t*)h
->mvd_cache
[list
][scan8
[7 ]+1]=
480 *(uint32_t*)h
->mvd_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h
->mvd_cache
[list
][scan8
[4 ]]=
482 *(uint32_t*)h
->mvd_cache
[list
][scan8
[12]]= 0;
484 if(h
->slice_type
== FF_B_TYPE
){
485 fill_rectangle(&h
->direct_cache
[scan8
[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type
)){
488 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type
)){
490 int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
491 h
->direct_cache
[scan8
[0] + 0 - 1*8]= h
->direct_table
[b8_xy
];
492 h
->direct_cache
[scan8
[0] + 2 - 1*8]= h
->direct_table
[b8_xy
+ 1];
494 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type
[0]))
498 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type
[0]))
500 h
->direct_cache
[scan8
[0] - 1 + 0*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[0]] + 1 + h
->b8_stride
*(left_block
[0]>>1)];
502 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type
[1]))
505 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type
[1]))
507 h
->direct_cache
[scan8
[0] - 1 + 2*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[1]] + 1 + h
->b8_stride
*(left_block
[2]>>1)];
509 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h
->neighbor_transform_size
= !!IS_8x8DCT(top_type
) + !!IS_8x8DCT(left_type
[0]);
552 static inline void write_back_intra_pred_mode(H264Context
*h
){
553 const int mb_xy
= h
->mb_xy
;
555 h
->intra4x4_pred_mode
[mb_xy
][0]= h
->intra4x4_pred_mode_cache
[7+8*1];
556 h
->intra4x4_pred_mode
[mb_xy
][1]= h
->intra4x4_pred_mode_cache
[7+8*2];
557 h
->intra4x4_pred_mode
[mb_xy
][2]= h
->intra4x4_pred_mode_cache
[7+8*3];
558 h
->intra4x4_pred_mode
[mb_xy
][3]= h
->intra4x4_pred_mode_cache
[7+8*4];
559 h
->intra4x4_pred_mode
[mb_xy
][4]= h
->intra4x4_pred_mode_cache
[4+8*4];
560 h
->intra4x4_pred_mode
[mb_xy
][5]= h
->intra4x4_pred_mode_cache
[5+8*4];
561 h
->intra4x4_pred_mode
[mb_xy
][6]= h
->intra4x4_pred_mode_cache
[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context
*h
){
568 MpegEncContext
* const s
= &h
->s
;
569 static const int8_t top
[12]= {-1, 0,LEFT_DC_PRED
,-1,-1,-1,-1,-1, 0};
570 static const int8_t left
[12]= { 0,-1, TOP_DC_PRED
, 0,-1,-1,-1, 0,-1,DC_128_PRED
};
573 if(!(h
->top_samples_available
&0x8000)){
575 int status
= top
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] ];
577 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
580 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]= status
;
585 if(!(h
->left_samples_available
&0x8000)){
587 int status
= left
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
] ];
589 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
592 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
]= status
;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context
*h
, int mode
){
604 MpegEncContext
* const s
= &h
->s
;
605 static const int8_t top
[7]= {LEFT_DC_PRED8x8
, 1,-1,-1};
606 static const int8_t left
[7]= { TOP_DC_PRED8x8
,-1, 2,-1,DC_128_PRED8x8
};
609 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "out of range intra chroma pred mode at %d %d\n", s
->mb_x
, s
->mb_y
);
613 if(!(h
->top_samples_available
&0x8000)){
616 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
621 if(!(h
->left_samples_available
&0x8000)){
624 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context
*h
, int n
){
636 const int index8
= scan8
[n
];
637 const int left
= h
->intra4x4_pred_mode_cache
[index8
- 1];
638 const int top
= h
->intra4x4_pred_mode_cache
[index8
- 8];
639 const int min
= FFMIN(left
, top
);
641 tprintf(h
->s
.avctx
, "mode:%d %d min:%d\n", left
,top
, min
);
643 if(min
<0) return DC_PRED
;
647 static inline void write_back_non_zero_count(H264Context
*h
){
648 const int mb_xy
= h
->mb_xy
;
650 h
->non_zero_count
[mb_xy
][0]= h
->non_zero_count_cache
[7+8*1];
651 h
->non_zero_count
[mb_xy
][1]= h
->non_zero_count_cache
[7+8*2];
652 h
->non_zero_count
[mb_xy
][2]= h
->non_zero_count_cache
[7+8*3];
653 h
->non_zero_count
[mb_xy
][3]= h
->non_zero_count_cache
[7+8*4];
654 h
->non_zero_count
[mb_xy
][4]= h
->non_zero_count_cache
[4+8*4];
655 h
->non_zero_count
[mb_xy
][5]= h
->non_zero_count_cache
[5+8*4];
656 h
->non_zero_count
[mb_xy
][6]= h
->non_zero_count_cache
[6+8*4];
658 h
->non_zero_count
[mb_xy
][9]= h
->non_zero_count_cache
[1+8*2];
659 h
->non_zero_count
[mb_xy
][8]= h
->non_zero_count_cache
[2+8*2];
660 h
->non_zero_count
[mb_xy
][7]= h
->non_zero_count_cache
[2+8*1];
662 h
->non_zero_count
[mb_xy
][12]=h
->non_zero_count_cache
[1+8*5];
663 h
->non_zero_count
[mb_xy
][11]=h
->non_zero_count_cache
[2+8*5];
664 h
->non_zero_count
[mb_xy
][10]=h
->non_zero_count_cache
[2+8*4];
667 // store all luma nnzs, for deblocking
670 v
+= (!!h
->non_zero_count_cache
[scan8
[i
]]) << i
;
671 *(uint16_t*)&h
->non_zero_count
[mb_xy
][14] = v
;
676 * gets the predicted number of non zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context
*h
, int n
){
680 const int index8
= scan8
[n
];
681 const int left
= h
->non_zero_count_cache
[index8
- 1];
682 const int top
= h
->non_zero_count_cache
[index8
- 8];
685 if(i
<64) i
= (i
+1)>>1;
687 tprintf(h
->s
.avctx
, "pred_nnz L%X T%X n%d s%d P%X\n", left
, top
, n
, scan8
[n
], i
&31);
692 static inline int fetch_diagonal_mv(H264Context
*h
, const int16_t **C
, int i
, int list
, int part_width
){
693 const int topright_ref
= h
->ref_cache
[list
][ i
- 8 + part_width
];
694 MpegEncContext
*s
= &h
->s
;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types
= s
->current_picture_ptr
->mb_type
;
701 *(uint32_t*)h
->mv_cache
[list
][scan8
[0]-2] = 0;
702 *C
= h
->mv_cache
[list
][scan8
[0]-2];
705 && (s
->mb_y
&1) && i
< scan8
[0]+8 && topright_ref
!= PART_NOT_AVAILABLE
){
706 int topright_xy
= s
->mb_x
+ (s
->mb_y
-1)*s
->mb_stride
+ (i
== scan8
[0]+3);
707 if(IS_INTERLACED(mb_types
[topright_xy
])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s
->mb_x
*4+(i
&7)-4+part_width
, s
->mb_y
*4-1);
721 if(topright_ref
== PART_NOT_AVAILABLE
722 && ((s
->mb_y
&1) || i
>= scan8
[0]+8) && (i
&7)==4
723 && h
->ref_cache
[list
][scan8
[0]-1] != PART_NOT_AVAILABLE
){
725 && IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])){
726 SET_DIAG_MV(*2, >>1, s
->mb_x
*4-1, (s
->mb_y
|1)*4+(s
->mb_y
&1)*2+(i
>>4)-1);
729 && !IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])
731 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
732 SET_DIAG_MV(/2, <<1, s
->mb_x
*4-1, (s
->mb_y
&~1)*4 - 1 + ((i
-scan8
[0])>>3)*2);
738 if(topright_ref
!= PART_NOT_AVAILABLE
){
739 *C
= h
->mv_cache
[list
][ i
- 8 + part_width
];
742 tprintf(s
->avctx
, "topright MV not available\n");
744 *C
= h
->mv_cache
[list
][ i
- 8 - 1 ];
745 return h
->ref_cache
[list
][ i
- 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context
* const h
, int n
, int part_width
, int list
, int ref
, int * const mx
, int * const my
){
757 const int index8
= scan8
[n
];
758 const int top_ref
= h
->ref_cache
[list
][ index8
- 8 ];
759 const int left_ref
= h
->ref_cache
[list
][ index8
- 1 ];
760 const int16_t * const A
= h
->mv_cache
[list
][ index8
- 1 ];
761 const int16_t * const B
= h
->mv_cache
[list
][ index8
- 8 ];
763 int diagonal_ref
, match_count
;
765 assert(part_width
==1 || part_width
==2 || part_width
==4);
775 diagonal_ref
= fetch_diagonal_mv(h
, &C
, index8
, list
, part_width
);
776 match_count
= (diagonal_ref
==ref
) + (top_ref
==ref
) + (left_ref
==ref
);
777 tprintf(h
->s
.avctx
, "pred_motion match_count=%d\n", match_count
);
778 if(match_count
> 1){ //most common
779 *mx
= mid_pred(A
[0], B
[0], C
[0]);
780 *my
= mid_pred(A
[1], B
[1], C
[1]);
781 }else if(match_count
==1){
785 }else if(top_ref
==ref
){
793 if(top_ref
== PART_NOT_AVAILABLE
&& diagonal_ref
== PART_NOT_AVAILABLE
&& left_ref
!= PART_NOT_AVAILABLE
){
797 *mx
= mid_pred(A
[0], B
[0], C
[0]);
798 *my
= mid_pred(A
[1], B
[1], C
[1]);
802 tprintf(h
->s
.avctx
, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], diagonal_ref
, C
[0], C
[1], left_ref
, A
[0], A
[1], ref
, *mx
, *my
, h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
813 const int top_ref
= h
->ref_cache
[list
][ scan8
[0] - 8 ];
814 const int16_t * const B
= h
->mv_cache
[list
][ scan8
[0] - 8 ];
816 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
824 const int left_ref
= h
->ref_cache
[list
][ scan8
[8] - 1 ];
825 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[8] - 1 ];
827 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
837 pred_motion(h
, n
, 4, list
, ref
, mx
, my
);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
848 const int left_ref
= h
->ref_cache
[list
][ scan8
[0] - 1 ];
849 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[0] - 1 ];
851 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
862 diagonal_ref
= fetch_diagonal_mv(h
, &C
, scan8
[4], list
, 2);
864 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref
, C
[0], C
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
866 if(diagonal_ref
== ref
){
874 pred_motion(h
, n
, 2, list
, ref
, mx
, my
);
877 static inline void pred_pskip_motion(H264Context
* const h
, int * const mx
, int * const my
){
878 const int top_ref
= h
->ref_cache
[0][ scan8
[0] - 8 ];
879 const int left_ref
= h
->ref_cache
[0][ scan8
[0] - 1 ];
881 tprintf(h
->s
.avctx
, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref
, left_ref
, h
->s
.mb_x
, h
->s
.mb_y
);
883 if(top_ref
== PART_NOT_AVAILABLE
|| left_ref
== PART_NOT_AVAILABLE
884 || (top_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 8 ] == 0)
885 || (left_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 1 ] == 0)){
891 pred_motion(h
, 0, 4, 0, 0, mx
, my
);
896 static inline void direct_dist_scale_factor(H264Context
* const h
){
897 const int poc
= h
->s
.current_picture_ptr
->poc
;
898 const int poc1
= h
->ref_list
[1][0].poc
;
900 for(i
=0; i
<h
->ref_count
[0]; i
++){
901 int poc0
= h
->ref_list
[0][i
].poc
;
902 int td
= av_clip(poc1
- poc0
, -128, 127);
903 if(td
== 0 /* FIXME || pic0 is a long-term ref */){
904 h
->dist_scale_factor
[i
] = 256;
906 int tb
= av_clip(poc
- poc0
, -128, 127);
907 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
908 h
->dist_scale_factor
[i
] = av_clip((tb
*tx
+ 32) >> 6, -1024, 1023);
912 for(i
=0; i
<h
->ref_count
[0]; i
++){
913 h
->dist_scale_factor_field
[2*i
] =
914 h
->dist_scale_factor_field
[2*i
+1] = h
->dist_scale_factor
[i
];
918 static inline void direct_ref_list_init(H264Context
* const h
){
919 MpegEncContext
* const s
= &h
->s
;
920 Picture
* const ref1
= &h
->ref_list
[1][0];
921 Picture
* const cur
= s
->current_picture_ptr
;
923 if(cur
->pict_type
== FF_I_TYPE
)
924 cur
->ref_count
[0] = 0;
925 if(cur
->pict_type
!= FF_B_TYPE
)
926 cur
->ref_count
[1] = 0;
927 for(list
=0; list
<2; list
++){
928 cur
->ref_count
[list
] = h
->ref_count
[list
];
929 for(j
=0; j
<h
->ref_count
[list
]; j
++)
930 cur
->ref_poc
[list
][j
] = h
->ref_list
[list
][j
].poc
;
932 if(cur
->pict_type
!= FF_B_TYPE
|| h
->direct_spatial_mv_pred
)
934 for(list
=0; list
<2; list
++){
935 for(i
=0; i
<ref1
->ref_count
[list
]; i
++){
936 const int poc
= ref1
->ref_poc
[list
][i
];
937 h
->map_col_to_list0
[list
][i
] = 0; /* bogus; fills in for missing frames */
938 for(j
=0; j
<h
->ref_count
[list
]; j
++)
939 if(h
->ref_list
[list
][j
].poc
== poc
){
940 h
->map_col_to_list0
[list
][i
] = j
;
946 for(list
=0; list
<2; list
++){
947 for(i
=0; i
<ref1
->ref_count
[list
]; i
++){
948 j
= h
->map_col_to_list0
[list
][i
];
949 h
->map_col_to_list0_field
[list
][2*i
] = 2*j
;
950 h
->map_col_to_list0_field
[list
][2*i
+1] = 2*j
+1;
956 static inline void pred_direct_motion(H264Context
* const h
, int *mb_type
){
957 MpegEncContext
* const s
= &h
->s
;
958 const int mb_xy
= h
->mb_xy
;
959 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
960 const int b4_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
961 const int mb_type_col
= h
->ref_list
[1][0].mb_type
[mb_xy
];
962 const int16_t (*l1mv0
)[2] = (const int16_t (*)[2]) &h
->ref_list
[1][0].motion_val
[0][b4_xy
];
963 const int16_t (*l1mv1
)[2] = (const int16_t (*)[2]) &h
->ref_list
[1][0].motion_val
[1][b4_xy
];
964 const int8_t *l1ref0
= &h
->ref_list
[1][0].ref_index
[0][b8_xy
];
965 const int8_t *l1ref1
= &h
->ref_list
[1][0].ref_index
[1][b8_xy
];
966 const int is_b8x8
= IS_8X8(*mb_type
);
967 unsigned int sub_mb_type
;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col
) && !h
->sps
.direct_8x8_inference_flag
){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type
= MB_TYPE_8x8
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_4x4 */
975 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
;
976 }else if(!is_b8x8
&& (mb_type_col
& MB_TYPE_16x16_OR_INTRA
)){
977 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
978 *mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_16x16 */
980 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
981 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
;
984 *mb_type
|= MB_TYPE_DIRECT2
;
986 *mb_type
|= MB_TYPE_INTERLACED
;
988 tprintf(s
->avctx
, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type
, sub_mb_type
, is_b8x8
, mb_type_col
);
990 if(h
->direct_spatial_mv_pred
){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list
=0; list
<2; list
++){
999 int refa
= h
->ref_cache
[list
][scan8
[0] - 1];
1000 int refb
= h
->ref_cache
[list
][scan8
[0] - 8];
1001 int refc
= h
->ref_cache
[list
][scan8
[0] - 8 + 4];
1003 refc
= h
->ref_cache
[list
][scan8
[0] - 8 - 1];
1005 if(ref
[list
] < 0 || (refb
< ref
[list
] && refb
>= 0))
1007 if(ref
[list
] < 0 || (refc
< ref
[list
] && refc
>= 0))
1013 if(ref
[0] < 0 && ref
[1] < 0){
1014 ref
[0] = ref
[1] = 0;
1015 mv
[0][0] = mv
[0][1] =
1016 mv
[1][0] = mv
[1][1] = 0;
1018 for(list
=0; list
<2; list
++){
1020 pred_motion(h
, 0, 4, list
, ref
[list
], &mv
[list
][0], &mv
[list
][1]);
1022 mv
[list
][0] = mv
[list
][1] = 0;
1028 *mb_type
&= ~MB_TYPE_L1
;
1029 sub_mb_type
&= ~MB_TYPE_L1
;
1030 }else if(ref
[0] < 0){
1032 *mb_type
&= ~MB_TYPE_L0
;
1033 sub_mb_type
&= ~MB_TYPE_L0
;
1036 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
)){
1037 int pair_xy
= s
->mb_x
+ (s
->mb_y
&~1)*s
->mb_stride
;
1038 int mb_types_col
[2];
1039 int b8_stride
= h
->b8_stride
;
1040 int b4_stride
= h
->b_stride
;
1042 *mb_type
= (*mb_type
& ~MB_TYPE_16x16
) | MB_TYPE_8x8
;
1044 if(IS_INTERLACED(*mb_type
)){
1045 mb_types_col
[0] = h
->ref_list
[1][0].mb_type
[pair_xy
];
1046 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+s
->mb_stride
];
1048 l1ref0
-= 2*b8_stride
;
1049 l1ref1
-= 2*b8_stride
;
1050 l1mv0
-= 4*b4_stride
;
1051 l1mv1
-= 4*b4_stride
;
1056 int cur_poc
= s
->current_picture_ptr
->poc
;
1057 int *col_poc
= h
->ref_list
[1]->field_poc
;
1058 int col_parity
= FFABS(col_poc
[0] - cur_poc
) >= FFABS(col_poc
[1] - cur_poc
);
1059 int dy
= 2*col_parity
- (s
->mb_y
&1);
1061 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+ col_parity
*s
->mb_stride
];
1062 l1ref0
+= dy
*b8_stride
;
1063 l1ref1
+= dy
*b8_stride
;
1064 l1mv0
+= 2*dy
*b4_stride
;
1065 l1mv1
+= 2*dy
*b4_stride
;
1069 for(i8
=0; i8
<4; i8
++){
1072 int xy8
= x8
+y8
*b8_stride
;
1073 int xy4
= 3*x8
+y8
*b4_stride
;
1076 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1078 h
->sub_mb_type
[i8
] = sub_mb_type
;
1080 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1081 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1082 if(!IS_INTRA(mb_types_col
[y8
])
1083 && ( (l1ref0
[xy8
] == 0 && FFABS(l1mv0
[xy4
][0]) <= 1 && FFABS(l1mv0
[xy4
][1]) <= 1)
1084 || (l1ref0
[xy8
] < 0 && l1ref1
[xy8
] == 0 && FFABS(l1mv1
[xy4
][0]) <= 1 && FFABS(l1mv1
[xy4
][1]) <= 1))){
1086 a
= pack16to32(mv
[0][0],mv
[0][1]);
1088 b
= pack16to32(mv
[1][0],mv
[1][1]);
1090 a
= pack16to32(mv
[0][0],mv
[0][1]);
1091 b
= pack16to32(mv
[1][0],mv
[1][1]);
1093 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, a
, 4);
1094 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, b
, 4);
1096 }else if(IS_16X16(*mb_type
)){
1099 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, (uint8_t)ref
[0], 1);
1100 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, (uint8_t)ref
[1], 1);
1101 if(!IS_INTRA(mb_type_col
)
1102 && ( (l1ref0
[0] == 0 && FFABS(l1mv0
[0][0]) <= 1 && FFABS(l1mv0
[0][1]) <= 1)
1103 || (l1ref0
[0] < 0 && l1ref1
[0] == 0 && FFABS(l1mv1
[0][0]) <= 1 && FFABS(l1mv1
[0][1]) <= 1
1104 && (h
->x264_build
>33 || !h
->x264_build
)))){
1106 a
= pack16to32(mv
[0][0],mv
[0][1]);
1108 b
= pack16to32(mv
[1][0],mv
[1][1]);
1110 a
= pack16to32(mv
[0][0],mv
[0][1]);
1111 b
= pack16to32(mv
[1][0],mv
[1][1]);
1113 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, a
, 4);
1114 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, b
, 4);
1116 for(i8
=0; i8
<4; i8
++){
1117 const int x8
= i8
&1;
1118 const int y8
= i8
>>1;
1120 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1122 h
->sub_mb_type
[i8
] = sub_mb_type
;
1124 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1125 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1126 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1127 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1130 if(!IS_INTRA(mb_type_col
) && ( l1ref0
[x8
+ y8
*h
->b8_stride
] == 0
1131 || (l1ref0
[x8
+ y8
*h
->b8_stride
] < 0 && l1ref1
[x8
+ y8
*h
->b8_stride
] == 0
1132 && (h
->x264_build
>33 || !h
->x264_build
)))){
1133 const int16_t (*l1mv
)[2]= l1ref0
[x8
+ y8
*h
->b8_stride
] == 0 ? l1mv0
: l1mv1
;
1134 if(IS_SUB_8X8(sub_mb_type
)){
1135 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*h
->b_stride
];
1136 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1138 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1140 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1143 for(i4
=0; i4
<4; i4
++){
1144 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*h
->b_stride
];
1145 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1147 *(uint32_t*)h
->mv_cache
[0][scan8
[i8
*4+i4
]] = 0;
1149 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] = 0;
1155 }else{ /* direct temporal mv pred */
1156 const int *map_col_to_list0
[2] = {h
->map_col_to_list0
[0], h
->map_col_to_list0
[1]};
1157 const int *dist_scale_factor
= h
->dist_scale_factor
;
1160 if(IS_INTERLACED(*mb_type
)){
1161 map_col_to_list0
[0] = h
->map_col_to_list0_field
[0];
1162 map_col_to_list0
[1] = h
->map_col_to_list0_field
[1];
1163 dist_scale_factor
= h
->dist_scale_factor_field
;
1165 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
)){
1166 /* FIXME assumes direct_8x8_inference == 1 */
1167 const int pair_xy
= s
->mb_x
+ (s
->mb_y
&~1)*s
->mb_stride
;
1168 int mb_types_col
[2];
1171 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
1172 | (is_b8x8
? 0 : MB_TYPE_DIRECT2
)
1173 | (*mb_type
& MB_TYPE_INTERLACED
);
1174 sub_mb_type
= MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_16x16
;
1176 if(IS_INTERLACED(*mb_type
)){
1177 /* frame to field scaling */
1178 mb_types_col
[0] = h
->ref_list
[1][0].mb_type
[pair_xy
];
1179 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+s
->mb_stride
];
1181 l1ref0
-= 2*h
->b8_stride
;
1182 l1ref1
-= 2*h
->b8_stride
;
1183 l1mv0
-= 4*h
->b_stride
;
1184 l1mv1
-= 4*h
->b_stride
;
1188 if( (mb_types_col
[0] & MB_TYPE_16x16_OR_INTRA
)
1189 && (mb_types_col
[1] & MB_TYPE_16x16_OR_INTRA
)
1191 *mb_type
|= MB_TYPE_16x8
;
1193 *mb_type
|= MB_TYPE_8x8
;
1195 /* field to frame scaling */
1196 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1197 * but in MBAFF, top and bottom POC are equal */
1198 int dy
= (s
->mb_y
&1) ? 1 : 2;
1200 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+s
->mb_stride
];
1201 l1ref0
+= dy
*h
->b8_stride
;
1202 l1ref1
+= dy
*h
->b8_stride
;
1203 l1mv0
+= 2*dy
*h
->b_stride
;
1204 l1mv1
+= 2*dy
*h
->b_stride
;
1207 if((mb_types_col
[0] & (MB_TYPE_16x16_OR_INTRA
|MB_TYPE_16x8
))
1209 *mb_type
|= MB_TYPE_16x16
;
1211 *mb_type
|= MB_TYPE_8x8
;
1214 for(i8
=0; i8
<4; i8
++){
1215 const int x8
= i8
&1;
1216 const int y8
= i8
>>1;
1218 const int16_t (*l1mv
)[2]= l1mv0
;
1220 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1222 h
->sub_mb_type
[i8
] = sub_mb_type
;
1224 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1225 if(IS_INTRA(mb_types_col
[y8
])){
1226 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1227 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1228 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1232 ref0
= l1ref0
[x8
+ (y8
*2>>y_shift
)*h
->b8_stride
];
1234 ref0
= map_col_to_list0
[0][ref0
*2>>y_shift
];
1236 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ (y8
*2>>y_shift
)*h
->b8_stride
]*2>>y_shift
];
1239 scale
= dist_scale_factor
[ref0
];
1240 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1243 const int16_t *mv_col
= l1mv
[x8
*3 + (y8
*6>>y_shift
)*h
->b_stride
];
1244 int my_col
= (mv_col
[1]<<y_shift
)/2;
1245 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1246 int my
= (scale
* my_col
+ 128) >> 8;
1247 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1248 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-my_col
), 4);
1255 /* one-to-one mv scaling */
1257 if(IS_16X16(*mb_type
)){
1260 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, 0, 1);
1261 if(IS_INTRA(mb_type_col
)){
1264 const int ref0
= l1ref0
[0] >= 0 ? map_col_to_list0
[0][l1ref0
[0]]
1265 : map_col_to_list0
[1][l1ref1
[0]];
1266 const int scale
= dist_scale_factor
[ref0
];
1267 const int16_t *mv_col
= l1ref0
[0] >= 0 ? l1mv0
[0] : l1mv1
[0];
1269 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1270 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1272 mv0
= pack16to32(mv_l0
[0],mv_l0
[1]);
1273 mv1
= pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1275 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref
, 1);
1276 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, mv0
, 4);
1277 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, mv1
, 4);
1279 for(i8
=0; i8
<4; i8
++){
1280 const int x8
= i8
&1;
1281 const int y8
= i8
>>1;
1283 const int16_t (*l1mv
)[2]= l1mv0
;
1285 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1287 h
->sub_mb_type
[i8
] = sub_mb_type
;
1288 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1289 if(IS_INTRA(mb_type_col
)){
1290 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1291 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1292 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1296 ref0
= l1ref0
[x8
+ y8
*h
->b8_stride
];
1298 ref0
= map_col_to_list0
[0][ref0
];
1300 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*h
->b8_stride
]];
1303 scale
= dist_scale_factor
[ref0
];
1305 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1306 if(IS_SUB_8X8(sub_mb_type
)){
1307 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*h
->b_stride
];
1308 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1309 int my
= (scale
* mv_col
[1] + 128) >> 8;
1310 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1311 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-mv_col
[1]), 4);
1313 for(i4
=0; i4
<4; i4
++){
1314 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*h
->b_stride
];
1315 int16_t *mv_l0
= h
->mv_cache
[0][scan8
[i8
*4+i4
]];
1316 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1317 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1318 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] =
1319 pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1326 static inline void write_back_motion(H264Context
*h
, int mb_type
){
1327 MpegEncContext
* const s
= &h
->s
;
1328 const int b_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1329 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1332 if(!USES_LIST(mb_type
, 0))
1333 fill_rectangle(&s
->current_picture
.ref_index
[0][b8_xy
], 2, 2, h
->b8_stride
, (uint8_t)LIST_NOT_USED
, 1);
1335 for(list
=0; list
<h
->list_count
; list
++){
1337 if(!USES_LIST(mb_type
, list
))
1341 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+0 + 8*y
];
1342 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+2 + 8*y
];
1344 if( h
->pps
.cabac
) {
1345 if(IS_SKIP(mb_type
))
1346 fill_rectangle(h
->mvd_table
[list
][b_xy
], 4, 4, h
->b_stride
, 0, 4);
1349 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+0 + 8*y
];
1350 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+2 + 8*y
];
1355 int8_t *ref_index
= &s
->current_picture
.ref_index
[list
][b8_xy
];
1356 ref_index
[0+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[0]];
1357 ref_index
[1+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[4]];
1358 ref_index
[0+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[8]];
1359 ref_index
[1+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[12]];
1363 if(h
->slice_type
== FF_B_TYPE
&& h
->pps
.cabac
){
1364 if(IS_8X8(mb_type
)){
1365 uint8_t *direct_table
= &h
->direct_table
[b8_xy
];
1366 direct_table
[1+0*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[1]) ? 1 : 0;
1367 direct_table
[0+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[2]) ? 1 : 0;
1368 direct_table
[1+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[3]) ? 1 : 0;
1374 * Decodes a network abstraction layer unit.
1375 * @param consumed is the number of bytes used as input
1376 * @param length is the length of the array
1377 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1378 * @returns decoded bytes, might be src+1 if no escapes
1380 static const uint8_t *decode_nal(H264Context
*h
, const uint8_t *src
, int *dst_length
, int *consumed
, int length
){
1385 // src[0]&0x80; //forbidden bit
1386 h
->nal_ref_idc
= src
[0]>>5;
1387 h
->nal_unit_type
= src
[0]&0x1F;
1391 for(i
=0; i
<length
; i
++)
1392 printf("%2X ", src
[i
]);
1394 for(i
=0; i
+1<length
; i
+=2){
1395 if(src
[i
]) continue;
1396 if(i
>0 && src
[i
-1]==0) i
--;
1397 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1399 /* startcode, so we must be past the end */
1406 if(i
>=length
-1){ //no escaped 0
1407 *dst_length
= length
;
1408 *consumed
= length
+1; //+1 for the header
1412 bufidx
= h
->nal_unit_type
== NAL_DPC
? 1 : 0; // use second escape buffer for inter data
1413 h
->rbsp_buffer
[bufidx
]= av_fast_realloc(h
->rbsp_buffer
[bufidx
], &h
->rbsp_buffer_size
[bufidx
], length
);
1414 dst
= h
->rbsp_buffer
[bufidx
];
1420 //printf("decoding esc\n");
1423 //remove escapes (very rare 1:2^22)
1424 if(si
+2<length
&& src
[si
]==0 && src
[si
+1]==0 && src
[si
+2]<=3){
1425 if(src
[si
+2]==3){ //escape
1430 }else //next start code
1434 dst
[di
++]= src
[si
++];
1438 *consumed
= si
+ 1;//+1 for the header
1439 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1444 * identifies the exact end of the bitstream
1445 * @return the length of the trailing, or 0 if damaged
1447 static int decode_rbsp_trailing(H264Context
*h
, const uint8_t *src
){
1451 tprintf(h
->s
.avctx
, "rbsp trailing %X\n", v
);
1461 * idct tranforms the 16 dc values and dequantize them.
1462 * @param qp quantization parameter
1464 static void h264_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1467 int temp
[16]; //FIXME check if this is a good idea
1468 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1469 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1471 //memset(block, 64, 2*256);
1474 const int offset
= y_offset
[i
];
1475 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1476 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1477 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1478 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1487 const int offset
= x_offset
[i
];
1488 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1489 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1490 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1491 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1493 block
[stride
*0 +offset
]= ((((z0
+ z3
)*qmul
+ 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1494 block
[stride
*2 +offset
]= ((((z1
+ z2
)*qmul
+ 128 ) >> 8));
1495 block
[stride
*8 +offset
]= ((((z1
- z2
)*qmul
+ 128 ) >> 8));
1496 block
[stride
*10+offset
]= ((((z0
- z3
)*qmul
+ 128 ) >> 8));
1502 * dct tranforms the 16 dc values.
1503 * @param qp quantization parameter ??? FIXME
1505 static void h264_luma_dc_dct_c(DCTELEM
*block
/*, int qp*/){
1506 // const int qmul= dequant_coeff[qp][0];
1508 int temp
[16]; //FIXME check if this is a good idea
1509 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1510 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1513 const int offset
= y_offset
[i
];
1514 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1515 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1516 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1517 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1526 const int offset
= x_offset
[i
];
1527 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1528 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1529 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1530 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1532 block
[stride
*0 +offset
]= (z0
+ z3
)>>1;
1533 block
[stride
*2 +offset
]= (z1
+ z2
)>>1;
1534 block
[stride
*8 +offset
]= (z1
- z2
)>>1;
1535 block
[stride
*10+offset
]= (z0
- z3
)>>1;
1543 static void chroma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1544 const int stride
= 16*2;
1545 const int xStride
= 16;
1548 a
= block
[stride
*0 + xStride
*0];
1549 b
= block
[stride
*0 + xStride
*1];
1550 c
= block
[stride
*1 + xStride
*0];
1551 d
= block
[stride
*1 + xStride
*1];
1558 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
) >> 7;
1559 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
) >> 7;
1560 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
) >> 7;
1561 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
) >> 7;
1565 static void chroma_dc_dct_c(DCTELEM
*block
){
1566 const int stride
= 16*2;
1567 const int xStride
= 16;
1570 a
= block
[stride
*0 + xStride
*0];
1571 b
= block
[stride
*0 + xStride
*1];
1572 c
= block
[stride
*1 + xStride
*0];
1573 d
= block
[stride
*1 + xStride
*1];
1580 block
[stride
*0 + xStride
*0]= (a
+c
);
1581 block
[stride
*0 + xStride
*1]= (e
+b
);
1582 block
[stride
*1 + xStride
*0]= (a
-c
);
1583 block
[stride
*1 + xStride
*1]= (e
-b
);
1588 * gets the chroma qp.
1590 static inline int get_chroma_qp(H264Context
*h
, int t
, int qscale
){
1591 return h
->pps
.chroma_qp_table
[t
][qscale
& 0xff];
1594 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1595 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1596 static inline int quantize_c(DCTELEM
*block
, uint8_t *scantable
, int qscale
, int intra
, int separate_dc
){
1598 const int * const quant_table
= quant_coeff
[qscale
];
1599 const int bias
= intra
? (1<<QUANT_SHIFT
)/3 : (1<<QUANT_SHIFT
)/6;
1600 const unsigned int threshold1
= (1<<QUANT_SHIFT
) - bias
- 1;
1601 const unsigned int threshold2
= (threshold1
<<1);
1607 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
-2))/3 : (1<<(QUANT_SHIFT
-2))/6;
1608 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
-2)) - dc_bias
- 1;
1609 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1611 int level
= block
[0]*quant_coeff
[qscale
+18][0];
1612 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1614 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
-2);
1617 level
= (dc_bias
- level
)>>(QUANT_SHIFT
-2);
1620 // last_non_zero = i;
1625 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
+1))/3 : (1<<(QUANT_SHIFT
+1))/6;
1626 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
+1)) - dc_bias
- 1;
1627 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1629 int level
= block
[0]*quant_table
[0];
1630 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1632 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
+1);
1635 level
= (dc_bias
- level
)>>(QUANT_SHIFT
+1);
1638 // last_non_zero = i;
1651 const int j
= scantable
[i
];
1652 int level
= block
[j
]*quant_table
[j
];
1654 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1655 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1656 if(((unsigned)(level
+threshold1
))>threshold2
){
1658 level
= (bias
+ level
)>>QUANT_SHIFT
;
1661 level
= (bias
- level
)>>QUANT_SHIFT
;
1670 return last_non_zero
;
1673 static inline void mc_dir_part(H264Context
*h
, Picture
*pic
, int n
, int square
, int chroma_height
, int delta
, int list
,
1674 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1675 int src_x_offset
, int src_y_offset
,
1676 qpel_mc_func
*qpix_op
, h264_chroma_mc_func chroma_op
){
1677 MpegEncContext
* const s
= &h
->s
;
1678 const int mx
= h
->mv_cache
[list
][ scan8
[n
] ][0] + src_x_offset
*8;
1679 int my
= h
->mv_cache
[list
][ scan8
[n
] ][1] + src_y_offset
*8;
1680 const int luma_xy
= (mx
&3) + ((my
&3)<<2);
1681 uint8_t * src_y
= pic
->data
[0] + (mx
>>2) + (my
>>2)*h
->mb_linesize
;
1682 uint8_t * src_cb
, * src_cr
;
1683 int extra_width
= h
->emu_edge_width
;
1684 int extra_height
= h
->emu_edge_height
;
1686 const int full_mx
= mx
>>2;
1687 const int full_my
= my
>>2;
1688 const int pic_width
= 16*s
->mb_width
;
1689 const int pic_height
= 16*s
->mb_height
>> MB_FIELD
;
1691 if(!pic
->data
[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1694 if(mx
&7) extra_width
-= 3;
1695 if(my
&7) extra_height
-= 3;
1697 if( full_mx
< 0-extra_width
1698 || full_my
< 0-extra_height
1699 || full_mx
+ 16/*FIXME*/ > pic_width
+ extra_width
1700 || full_my
+ 16/*FIXME*/ > pic_height
+ extra_height
){
1701 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_y
- 2 - 2*h
->mb_linesize
, h
->mb_linesize
, 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
1702 src_y
= s
->edge_emu_buffer
+ 2 + 2*h
->mb_linesize
;
1706 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); //FIXME try variable height perhaps?
1708 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
1711 if(ENABLE_GRAY
&& s
->flags
&CODEC_FLAG_GRAY
) return;
1714 // chroma offset when predicting from a field of opposite parity
1715 my
+= 2 * ((s
->mb_y
& 1) - (pic
->reference
- 1));
1716 emu
|= (my
>>3) < 0 || (my
>>3) + 8 >= (pic_height
>>1);
1718 src_cb
= pic
->data
[1] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1719 src_cr
= pic
->data
[2] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1722 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1723 src_cb
= s
->edge_emu_buffer
;
1725 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1728 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1729 src_cr
= s
->edge_emu_buffer
;
1731 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1734 static inline void mc_part_std(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1735 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1736 int x_offset
, int y_offset
,
1737 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1738 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1739 int list0
, int list1
){
1740 MpegEncContext
* const s
= &h
->s
;
1741 qpel_mc_func
*qpix_op
= qpix_put
;
1742 h264_chroma_mc_func chroma_op
= chroma_put
;
1744 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1745 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1746 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1747 x_offset
+= 8*s
->mb_x
;
1748 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1751 Picture
*ref
= &h
->ref_list
[0][ h
->ref_cache
[0][ scan8
[n
] ] ];
1752 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 0,
1753 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1754 qpix_op
, chroma_op
);
1757 chroma_op
= chroma_avg
;
1761 Picture
*ref
= &h
->ref_list
[1][ h
->ref_cache
[1][ scan8
[n
] ] ];
1762 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 1,
1763 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1764 qpix_op
, chroma_op
);
1768 static inline void mc_part_weighted(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1769 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1770 int x_offset
, int y_offset
,
1771 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1772 h264_weight_func luma_weight_op
, h264_weight_func chroma_weight_op
,
1773 h264_biweight_func luma_weight_avg
, h264_biweight_func chroma_weight_avg
,
1774 int list0
, int list1
){
1775 MpegEncContext
* const s
= &h
->s
;
1777 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1778 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1779 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1780 x_offset
+= 8*s
->mb_x
;
1781 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1784 /* don't optimize for luma-only case, since B-frames usually
1785 * use implicit weights => chroma too. */
1786 uint8_t *tmp_cb
= s
->obmc_scratchpad
;
1787 uint8_t *tmp_cr
= s
->obmc_scratchpad
+ 8;
1788 uint8_t *tmp_y
= s
->obmc_scratchpad
+ 8*h
->mb_uvlinesize
;
1789 int refn0
= h
->ref_cache
[0][ scan8
[n
] ];
1790 int refn1
= h
->ref_cache
[1][ scan8
[n
] ];
1792 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, chroma_height
, delta
, 0,
1793 dest_y
, dest_cb
, dest_cr
,
1794 x_offset
, y_offset
, qpix_put
, chroma_put
);
1795 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, chroma_height
, delta
, 1,
1796 tmp_y
, tmp_cb
, tmp_cr
,
1797 x_offset
, y_offset
, qpix_put
, chroma_put
);
1799 if(h
->use_weight
== 2){
1800 int weight0
= h
->implicit_weight
[refn0
][refn1
];
1801 int weight1
= 64 - weight0
;
1802 luma_weight_avg( dest_y
, tmp_y
, h
-> mb_linesize
, 5, weight0
, weight1
, 0);
1803 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1804 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1806 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1807 h
->luma_weight
[0][refn0
], h
->luma_weight
[1][refn1
],
1808 h
->luma_offset
[0][refn0
] + h
->luma_offset
[1][refn1
]);
1809 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1810 h
->chroma_weight
[0][refn0
][0], h
->chroma_weight
[1][refn1
][0],
1811 h
->chroma_offset
[0][refn0
][0] + h
->chroma_offset
[1][refn1
][0]);
1812 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1813 h
->chroma_weight
[0][refn0
][1], h
->chroma_weight
[1][refn1
][1],
1814 h
->chroma_offset
[0][refn0
][1] + h
->chroma_offset
[1][refn1
][1]);
1817 int list
= list1
? 1 : 0;
1818 int refn
= h
->ref_cache
[list
][ scan8
[n
] ];
1819 Picture
*ref
= &h
->ref_list
[list
][refn
];
1820 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, list
,
1821 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1822 qpix_put
, chroma_put
);
1824 luma_weight_op(dest_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1825 h
->luma_weight
[list
][refn
], h
->luma_offset
[list
][refn
]);
1826 if(h
->use_weight_chroma
){
1827 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1828 h
->chroma_weight
[list
][refn
][0], h
->chroma_offset
[list
][refn
][0]);
1829 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1830 h
->chroma_weight
[list
][refn
][1], h
->chroma_offset
[list
][refn
][1]);
1835 static inline void mc_part(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1836 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1837 int x_offset
, int y_offset
,
1838 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1839 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1840 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
1841 int list0
, int list1
){
1842 if((h
->use_weight
==2 && list0
&& list1
1843 && (h
->implicit_weight
[ h
->ref_cache
[0][scan8
[n
]] ][ h
->ref_cache
[1][scan8
[n
]] ] != 32))
1844 || h
->use_weight
==1)
1845 mc_part_weighted(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1846 x_offset
, y_offset
, qpix_put
, chroma_put
,
1847 weight_op
[0], weight_op
[3], weight_avg
[0], weight_avg
[3], list0
, list1
);
1849 mc_part_std(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1850 x_offset
, y_offset
, qpix_put
, chroma_put
, qpix_avg
, chroma_avg
, list0
, list1
);
1853 static inline void prefetch_motion(H264Context
*h
, int list
){
1854 /* fetch pixels for estimated mv 4 macroblocks ahead
1855 * optimized for 64byte cache lines */
1856 MpegEncContext
* const s
= &h
->s
;
1857 const int refn
= h
->ref_cache
[list
][scan8
[0]];
1859 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0]>>2) + 16*s
->mb_x
+ 8;
1860 const int my
= (h
->mv_cache
[list
][scan8
[0]][1]>>2) + 16*s
->mb_y
;
1861 uint8_t **src
= h
->ref_list
[list
][refn
].data
;
1862 int off
= mx
+ (my
+ (s
->mb_x
&3)*4)*h
->mb_linesize
+ 64;
1863 s
->dsp
.prefetch(src
[0]+off
, s
->linesize
, 4);
1864 off
= (mx
>>1) + ((my
>>1) + (s
->mb_x
&7))*s
->uvlinesize
+ 64;
1865 s
->dsp
.prefetch(src
[1]+off
, src
[2]-src
[1], 2);
1869 static void hl_motion(H264Context
*h
, uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1870 qpel_mc_func (*qpix_put
)[16], h264_chroma_mc_func (*chroma_put
),
1871 qpel_mc_func (*qpix_avg
)[16], h264_chroma_mc_func (*chroma_avg
),
1872 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
){
1873 MpegEncContext
* const s
= &h
->s
;
1874 const int mb_xy
= h
->mb_xy
;
1875 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
1877 assert(IS_INTER(mb_type
));
1879 prefetch_motion(h
, 0);
1881 if(IS_16X16(mb_type
)){
1882 mc_part(h
, 0, 1, 8, 0, dest_y
, dest_cb
, dest_cr
, 0, 0,
1883 qpix_put
[0], chroma_put
[0], qpix_avg
[0], chroma_avg
[0],
1884 &weight_op
[0], &weight_avg
[0],
1885 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1886 }else if(IS_16X8(mb_type
)){
1887 mc_part(h
, 0, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 0,
1888 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1889 &weight_op
[1], &weight_avg
[1],
1890 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1891 mc_part(h
, 8, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 4,
1892 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1893 &weight_op
[1], &weight_avg
[1],
1894 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1895 }else if(IS_8X16(mb_type
)){
1896 mc_part(h
, 0, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 0, 0,
1897 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1898 &weight_op
[2], &weight_avg
[2],
1899 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1900 mc_part(h
, 4, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 4, 0,
1901 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1902 &weight_op
[2], &weight_avg
[2],
1903 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1907 assert(IS_8X8(mb_type
));
1910 const int sub_mb_type
= h
->sub_mb_type
[i
];
1912 int x_offset
= (i
&1)<<2;
1913 int y_offset
= (i
&2)<<1;
1915 if(IS_SUB_8X8(sub_mb_type
)){
1916 mc_part(h
, n
, 1, 4, 0, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1917 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1918 &weight_op
[3], &weight_avg
[3],
1919 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1920 }else if(IS_SUB_8X4(sub_mb_type
)){
1921 mc_part(h
, n
, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1922 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1923 &weight_op
[4], &weight_avg
[4],
1924 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1925 mc_part(h
, n
+2, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
+2,
1926 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1927 &weight_op
[4], &weight_avg
[4],
1928 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1929 }else if(IS_SUB_4X8(sub_mb_type
)){
1930 mc_part(h
, n
, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1931 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1932 &weight_op
[5], &weight_avg
[5],
1933 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1934 mc_part(h
, n
+1, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
+2, y_offset
,
1935 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1936 &weight_op
[5], &weight_avg
[5],
1937 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1940 assert(IS_SUB_4X4(sub_mb_type
));
1942 int sub_x_offset
= x_offset
+ 2*(j
&1);
1943 int sub_y_offset
= y_offset
+ (j
&2);
1944 mc_part(h
, n
+j
, 1, 2, 0, dest_y
, dest_cb
, dest_cr
, sub_x_offset
, sub_y_offset
,
1945 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1946 &weight_op
[6], &weight_avg
[6],
1947 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1953 prefetch_motion(h
, 1);
1956 static av_cold
void decode_init_vlc(void){
1957 static int done
= 0;
1963 init_vlc(&chroma_dc_coeff_token_vlc
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 4*5,
1964 &chroma_dc_coeff_token_len
[0], 1, 1,
1965 &chroma_dc_coeff_token_bits
[0], 1, 1, 1);
1968 init_vlc(&coeff_token_vlc
[i
], COEFF_TOKEN_VLC_BITS
, 4*17,
1969 &coeff_token_len
[i
][0], 1, 1,
1970 &coeff_token_bits
[i
][0], 1, 1, 1);
1974 init_vlc(&chroma_dc_total_zeros_vlc
[i
], CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 4,
1975 &chroma_dc_total_zeros_len
[i
][0], 1, 1,
1976 &chroma_dc_total_zeros_bits
[i
][0], 1, 1, 1);
1978 for(i
=0; i
<15; i
++){
1979 init_vlc(&total_zeros_vlc
[i
], TOTAL_ZEROS_VLC_BITS
, 16,
1980 &total_zeros_len
[i
][0], 1, 1,
1981 &total_zeros_bits
[i
][0], 1, 1, 1);
1985 init_vlc(&run_vlc
[i
], RUN_VLC_BITS
, 7,
1986 &run_len
[i
][0], 1, 1,
1987 &run_bits
[i
][0], 1, 1, 1);
1989 init_vlc(&run7_vlc
, RUN7_VLC_BITS
, 16,
1990 &run_len
[6][0], 1, 1,
1991 &run_bits
[6][0], 1, 1, 1);
1995 static void free_tables(H264Context
*h
){
1998 av_freep(&h
->intra4x4_pred_mode
);
1999 av_freep(&h
->chroma_pred_mode_table
);
2000 av_freep(&h
->cbp_table
);
2001 av_freep(&h
->mvd_table
[0]);
2002 av_freep(&h
->mvd_table
[1]);
2003 av_freep(&h
->direct_table
);
2004 av_freep(&h
->non_zero_count
);
2005 av_freep(&h
->slice_table_base
);
2006 h
->slice_table
= NULL
;
2008 av_freep(&h
->mb2b_xy
);
2009 av_freep(&h
->mb2b8_xy
);
2011 for(i
= 0; i
< MAX_SPS_COUNT
; i
++)
2012 av_freep(h
->sps_buffers
+ i
);
2014 for(i
= 0; i
< MAX_PPS_COUNT
; i
++)
2015 av_freep(h
->pps_buffers
+ i
);
2017 for(i
= 0; i
< h
->s
.avctx
->thread_count
; i
++) {
2018 hx
= h
->thread_context
[i
];
2020 av_freep(&hx
->top_borders
[1]);
2021 av_freep(&hx
->top_borders
[0]);
2022 av_freep(&hx
->s
.obmc_scratchpad
);
2026 static void init_dequant8_coeff_table(H264Context
*h
){
2028 const int transpose
= (h
->s
.dsp
.h264_idct8_add
!= ff_h264_idct8_add_c
); //FIXME ugly
2029 h
->dequant8_coeff
[0] = h
->dequant8_buffer
[0];
2030 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[1];
2032 for(i
=0; i
<2; i
++ ){
2033 if(i
&& !memcmp(h
->pps
.scaling_matrix8
[0], h
->pps
.scaling_matrix8
[1], 64*sizeof(uint8_t))){
2034 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[0];
2038 for(q
=0; q
<52; q
++){
2039 int shift
= ff_div6
[q
];
2040 int idx
= ff_rem6
[q
];
2042 h
->dequant8_coeff
[i
][q
][transpose
? (x
>>3)|((x
&7)<<3) : x
] =
2043 ((uint32_t)dequant8_coeff_init
[idx
][ dequant8_coeff_init_scan
[((x
>>1)&12) | (x
&3)] ] *
2044 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
2049 static void init_dequant4_coeff_table(H264Context
*h
){
2051 const int transpose
= (h
->s
.dsp
.h264_idct_add
!= ff_h264_idct_add_c
); //FIXME ugly
2052 for(i
=0; i
<6; i
++ ){
2053 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
2055 if(!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
], 16*sizeof(uint8_t))){
2056 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
2063 for(q
=0; q
<52; q
++){
2064 int shift
= ff_div6
[q
] + 2;
2065 int idx
= ff_rem6
[q
];
2067 h
->dequant4_coeff
[i
][q
][transpose
? (x
>>2)|((x
<<2)&0xF) : x
] =
2068 ((uint32_t)dequant4_coeff_init
[idx
][(x
&1) + ((x
>>2)&1)] *
2069 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
2074 static void init_dequant_tables(H264Context
*h
){
2076 init_dequant4_coeff_table(h
);
2077 if(h
->pps
.transform_8x8_mode
)
2078 init_dequant8_coeff_table(h
);
2079 if(h
->sps
.transform_bypass
){
2082 h
->dequant4_coeff
[i
][0][x
] = 1<<6;
2083 if(h
->pps
.transform_8x8_mode
)
2086 h
->dequant8_coeff
[i
][0][x
] = 1<<6;
2093 * needs width/height
2095 static int alloc_tables(H264Context
*h
){
2096 MpegEncContext
* const s
= &h
->s
;
2097 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+1);
2100 CHECKED_ALLOCZ(h
->intra4x4_pred_mode
, big_mb_num
* 8 * sizeof(uint8_t))
2102 CHECKED_ALLOCZ(h
->non_zero_count
, big_mb_num
* 16 * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h
->slice_table_base
, (big_mb_num
+s
->mb_stride
) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h
->cbp_table
, big_mb_num
* sizeof(uint16_t))
2106 CHECKED_ALLOCZ(h
->chroma_pred_mode_table
, big_mb_num
* sizeof(uint8_t))
2107 CHECKED_ALLOCZ(h
->mvd_table
[0], 32*big_mb_num
* sizeof(uint16_t));
2108 CHECKED_ALLOCZ(h
->mvd_table
[1], 32*big_mb_num
* sizeof(uint16_t));
2109 CHECKED_ALLOCZ(h
->direct_table
, 32*big_mb_num
* sizeof(uint8_t));
2111 memset(h
->slice_table_base
, -1, (big_mb_num
+s
->mb_stride
) * sizeof(uint8_t));
2112 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
*2 + 1;
2114 CHECKED_ALLOCZ(h
->mb2b_xy
, big_mb_num
* sizeof(uint32_t));
2115 CHECKED_ALLOCZ(h
->mb2b8_xy
, big_mb_num
* sizeof(uint32_t));
2116 for(y
=0; y
<s
->mb_height
; y
++){
2117 for(x
=0; x
<s
->mb_width
; x
++){
2118 const int mb_xy
= x
+ y
*s
->mb_stride
;
2119 const int b_xy
= 4*x
+ 4*y
*h
->b_stride
;
2120 const int b8_xy
= 2*x
+ 2*y
*h
->b8_stride
;
2122 h
->mb2b_xy
[mb_xy
]= b_xy
;
2123 h
->mb2b8_xy
[mb_xy
]= b8_xy
;
2127 s
->obmc_scratchpad
= NULL
;
2129 if(!h
->dequant4_coeff
[0])
2130 init_dequant_tables(h
);
2139 * Mimic alloc_tables(), but for every context thread.
2141 static void clone_tables(H264Context
*dst
, H264Context
*src
){
2142 dst
->intra4x4_pred_mode
= src
->intra4x4_pred_mode
;
2143 dst
->non_zero_count
= src
->non_zero_count
;
2144 dst
->slice_table
= src
->slice_table
;
2145 dst
->cbp_table
= src
->cbp_table
;
2146 dst
->mb2b_xy
= src
->mb2b_xy
;
2147 dst
->mb2b8_xy
= src
->mb2b8_xy
;
2148 dst
->chroma_pred_mode_table
= src
->chroma_pred_mode_table
;
2149 dst
->mvd_table
[0] = src
->mvd_table
[0];
2150 dst
->mvd_table
[1] = src
->mvd_table
[1];
2151 dst
->direct_table
= src
->direct_table
;
2153 dst
->s
.obmc_scratchpad
= NULL
;
2154 ff_h264_pred_init(&dst
->hpc
, src
->s
.codec_id
);
2159 * Allocate buffers which are not shared amongst multiple threads.
2161 static int context_init(H264Context
*h
){
2162 CHECKED_ALLOCZ(h
->top_borders
[0], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2163 CHECKED_ALLOCZ(h
->top_borders
[1], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2167 return -1; // free_tables will clean up for us
2170 static av_cold
void common_init(H264Context
*h
){
2171 MpegEncContext
* const s
= &h
->s
;
2173 s
->width
= s
->avctx
->width
;
2174 s
->height
= s
->avctx
->height
;
2175 s
->codec_id
= s
->avctx
->codec
->id
;
2177 ff_h264_pred_init(&h
->hpc
, s
->codec_id
);
2179 h
->dequant_coeff_pps
= -1;
2180 s
->unrestricted_mv
=1;
2181 s
->decode
=1; //FIXME
2183 memset(h
->pps
.scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
2184 memset(h
->pps
.scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
2187 static av_cold
int decode_init(AVCodecContext
*avctx
){
2188 H264Context
*h
= avctx
->priv_data
;
2189 MpegEncContext
* const s
= &h
->s
;
2191 MPV_decode_defaults(s
);
2196 s
->out_format
= FMT_H264
;
2197 s
->workaround_bugs
= avctx
->workaround_bugs
;
2200 // s->decode_mb= ff_h263_decode_mb;
2201 s
->quarter_sample
= 1;
2203 avctx
->pix_fmt
= PIX_FMT_YUV420P
;
2207 if(avctx
->extradata_size
> 0 && avctx
->extradata
&&
2208 *(char *)avctx
->extradata
== 1){
2215 h
->thread_context
[0] = h
;
2219 static int frame_start(H264Context
*h
){
2220 MpegEncContext
* const s
= &h
->s
;
2223 if(MPV_frame_start(s
, s
->avctx
) < 0)
2225 ff_er_frame_start(s
);
2227 * MPV_frame_start uses pict_type to derive key_frame.
2228 * This is incorrect for H.264; IDR markings must be used.
2229 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2230 * See decode_nal_units().
2232 s
->current_picture_ptr
->key_frame
= 0;
2234 assert(s
->linesize
&& s
->uvlinesize
);
2236 for(i
=0; i
<16; i
++){
2237 h
->block_offset
[i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2238 h
->block_offset
[24+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2241 h
->block_offset
[16+i
]=
2242 h
->block_offset
[20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2243 h
->block_offset
[24+16+i
]=
2244 h
->block_offset
[24+20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2247 /* can't be in alloc_tables because linesize isn't known there.
2248 * FIXME: redo bipred weight to not require extra buffer? */
2249 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
2250 if(!h
->thread_context
[i
]->s
.obmc_scratchpad
)
2251 h
->thread_context
[i
]->s
.obmc_scratchpad
= av_malloc(16*2*s
->linesize
+ 8*2*s
->uvlinesize
);
2253 /* some macroblocks will be accessed before they're available */
2254 if(FRAME_MBAFF
|| s
->avctx
->thread_count
> 1)
2255 memset(h
->slice_table
, -1, (s
->mb_height
*s
->mb_stride
-1) * sizeof(uint8_t));
2257 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2261 static inline void backup_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int simple
){
2262 MpegEncContext
* const s
= &h
->s
;
2266 src_cb
-= uvlinesize
;
2267 src_cr
-= uvlinesize
;
2269 // There are two lines saved, the line above the the top macroblock of a pair,
2270 // and the line above the bottom macroblock
2271 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
2272 for(i
=1; i
<17; i
++){
2273 h
->left_border
[i
]= src_y
[15+i
* linesize
];
2276 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 16*linesize
);
2277 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+16*linesize
);
2279 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2280 h
->left_border
[17 ]= h
->top_borders
[0][s
->mb_x
][16+7];
2281 h
->left_border
[17+9]= h
->top_borders
[0][s
->mb_x
][24+7];
2283 h
->left_border
[i
+17 ]= src_cb
[7+i
*uvlinesize
];
2284 h
->left_border
[i
+17+9]= src_cr
[7+i
*uvlinesize
];
2286 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+8*uvlinesize
);
2287 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+8*uvlinesize
);
2291 static inline void xchg_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
, int simple
){
2292 MpegEncContext
* const s
= &h
->s
;
2299 if(h
->deblocking_filter
== 2) {
2301 deblock_left
= h
->slice_table
[mb_xy
] == h
->slice_table
[mb_xy
- 1];
2302 deblock_top
= h
->slice_table
[mb_xy
] == h
->slice_table
[h
->top_mb_xy
];
2304 deblock_left
= (s
->mb_x
> 0);
2305 deblock_top
= (s
->mb_y
> 0);
2308 src_y
-= linesize
+ 1;
2309 src_cb
-= uvlinesize
+ 1;
2310 src_cr
-= uvlinesize
+ 1;
2312 #define XCHG(a,b,t,xchg)\
2319 for(i
= !deblock_top
; i
<17; i
++){
2320 XCHG(h
->left_border
[i
], src_y
[i
* linesize
], temp8
, xchg
);
2325 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
2326 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
2327 if(s
->mb_x
+1 < s
->mb_width
){
2328 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
2332 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2334 for(i
= !deblock_top
; i
<9; i
++){
2335 XCHG(h
->left_border
[i
+17 ], src_cb
[i
*uvlinesize
], temp8
, xchg
);
2336 XCHG(h
->left_border
[i
+17+9], src_cr
[i
*uvlinesize
], temp8
, xchg
);
2340 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
2341 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
2346 static inline void backup_pair_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
){
2347 MpegEncContext
* const s
= &h
->s
;
2350 src_y
-= 2 * linesize
;
2351 src_cb
-= 2 * uvlinesize
;
2352 src_cr
-= 2 * uvlinesize
;
2354 // There are two lines saved, the line above the the top macroblock of a pair,
2355 // and the line above the bottom macroblock
2356 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
2357 h
->left_border
[1]= h
->top_borders
[1][s
->mb_x
][15];
2358 for(i
=2; i
<34; i
++){
2359 h
->left_border
[i
]= src_y
[15+i
* linesize
];
2362 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 32*linesize
);
2363 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+32*linesize
);
2364 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 33*linesize
);
2365 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+33*linesize
);
2367 if(!ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2368 h
->left_border
[34 ]= h
->top_borders
[0][s
->mb_x
][16+7];
2369 h
->left_border
[34+ 1]= h
->top_borders
[1][s
->mb_x
][16+7];
2370 h
->left_border
[34+18 ]= h
->top_borders
[0][s
->mb_x
][24+7];
2371 h
->left_border
[34+18+1]= h
->top_borders
[1][s
->mb_x
][24+7];
2372 for(i
=2; i
<18; i
++){
2373 h
->left_border
[i
+34 ]= src_cb
[7+i
*uvlinesize
];
2374 h
->left_border
[i
+34+18]= src_cr
[7+i
*uvlinesize
];
2376 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+16*uvlinesize
);
2377 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+16*uvlinesize
);
2378 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+17*uvlinesize
);
2379 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+17*uvlinesize
);
2383 static inline void xchg_pair_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
){
2384 MpegEncContext
* const s
= &h
->s
;
2387 int deblock_left
= (s
->mb_x
> 0);
2388 int deblock_top
= (s
->mb_y
> 1);
2390 tprintf(s
->avctx
, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y
, src_cb
, src_cr
, linesize
, uvlinesize
);
2392 src_y
-= 2 * linesize
+ 1;
2393 src_cb
-= 2 * uvlinesize
+ 1;
2394 src_cr
-= 2 * uvlinesize
+ 1;
2396 #define XCHG(a,b,t,xchg)\
2403 for(i
= (!deblock_top
)<<1; i
<34; i
++){
2404 XCHG(h
->left_border
[i
], src_y
[i
* linesize
], temp8
, xchg
);
2409 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
2410 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
2411 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+0), *(uint64_t*)(src_y
+1 +linesize
), temp64
, xchg
);
2412 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+8), *(uint64_t*)(src_y
+9 +linesize
), temp64
, 1);
2413 if(s
->mb_x
+1 < s
->mb_width
){
2414 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
2415 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
+1]), *(uint64_t*)(src_y
+17 +linesize
), temp64
, 1);
2419 if(!ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2421 for(i
= (!deblock_top
) << 1; i
<18; i
++){
2422 XCHG(h
->left_border
[i
+34 ], src_cb
[i
*uvlinesize
], temp8
, xchg
);
2423 XCHG(h
->left_border
[i
+34+18], src_cr
[i
*uvlinesize
], temp8
, xchg
);
2427 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
2428 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
2429 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1 +uvlinesize
), temp64
, 1);
2430 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1 +uvlinesize
), temp64
, 1);
2435 static av_always_inline
void hl_decode_mb_internal(H264Context
*h
, int simple
){
2436 MpegEncContext
* const s
= &h
->s
;
2437 const int mb_x
= s
->mb_x
;
2438 const int mb_y
= s
->mb_y
;
2439 const int mb_xy
= h
->mb_xy
;
2440 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2441 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
2442 int linesize
, uvlinesize
/*dct_offset*/;
2444 int *block_offset
= &h
->block_offset
[0];
2445 const unsigned int bottom
= mb_y
& 1;
2446 const int transform_bypass
= (s
->qscale
== 0 && h
->sps
.transform_bypass
), is_h264
= (simple
|| s
->codec_id
== CODEC_ID_H264
);
2447 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2448 void (*idct_dc_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2450 dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
2451 dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2452 dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2454 s
->dsp
.prefetch(dest_y
+ (s
->mb_x
&3)*4*s
->linesize
+ 64, s
->linesize
, 4);
2455 s
->dsp
.prefetch(dest_cb
+ (s
->mb_x
&7)*s
->uvlinesize
+ 64, dest_cr
- dest_cb
, 2);
2457 if (!simple
&& MB_FIELD
) {
2458 linesize
= h
->mb_linesize
= s
->linesize
* 2;
2459 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
* 2;
2460 block_offset
= &h
->block_offset
[24];
2461 if(mb_y
&1){ //FIXME move out of this func?
2462 dest_y
-= s
->linesize
*15;
2463 dest_cb
-= s
->uvlinesize
*7;
2464 dest_cr
-= s
->uvlinesize
*7;
2468 for(list
=0; list
<h
->list_count
; list
++){
2469 if(!USES_LIST(mb_type
, list
))
2471 if(IS_16X16(mb_type
)){
2472 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
2473 fill_rectangle(ref
, 4, 4, 8, (16+*ref
)^(s
->mb_y
&1), 1);
2475 for(i
=0; i
<16; i
+=4){
2476 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2477 int ref
= h
->ref_cache
[list
][scan8
[i
]];
2479 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2, 8, (16+ref
)^(s
->mb_y
&1), 1);
2485 linesize
= h
->mb_linesize
= s
->linesize
;
2486 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
;
2487 // dct_offset = s->linesize * 16;
2490 if(transform_bypass
){
2492 idct_add
= IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
: s
->dsp
.add_pixels4
;
2493 }else if(IS_8x8DCT(mb_type
)){
2494 idct_dc_add
= s
->dsp
.h264_idct8_dc_add
;
2495 idct_add
= s
->dsp
.h264_idct8_add
;
2497 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2498 idct_add
= s
->dsp
.h264_idct_add
;
2501 if(!simple
&& FRAME_MBAFF
&& h
->deblocking_filter
&& IS_INTRA(mb_type
)
2502 && (!bottom
|| !IS_INTRA(s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]))){
2503 int mbt_y
= mb_y
&~1;
2504 uint8_t *top_y
= s
->current_picture
.data
[0] + (mbt_y
* 16* s
->linesize
) + mb_x
* 16;
2505 uint8_t *top_cb
= s
->current_picture
.data
[1] + (mbt_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2506 uint8_t *top_cr
= s
->current_picture
.data
[2] + (mbt_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2507 xchg_pair_border(h
, top_y
, top_cb
, top_cr
, s
->linesize
, s
->uvlinesize
, 1);
2510 if (!simple
&& IS_INTRA_PCM(mb_type
)) {
2513 // The pixels are stored in h->mb array in the same order as levels,
2514 // copy them in output in the correct order.
2515 for(i
=0; i
<16; i
++) {
2516 for (y
=0; y
<4; y
++) {
2517 for (x
=0; x
<4; x
++) {
2518 *(dest_y
+ block_offset
[i
] + y
*linesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
2522 for(i
=16; i
<16+4; i
++) {
2523 for (y
=0; y
<4; y
++) {
2524 for (x
=0; x
<4; x
++) {
2525 *(dest_cb
+ block_offset
[i
] + y
*uvlinesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
2529 for(i
=20; i
<20+4; i
++) {
2530 for (y
=0; y
<4; y
++) {
2531 for (x
=0; x
<4; x
++) {
2532 *(dest_cr
+ block_offset
[i
] + y
*uvlinesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
2537 if(IS_INTRA(mb_type
)){
2538 if(h
->deblocking_filter
&& (simple
|| !FRAME_MBAFF
))
2539 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 1, simple
);
2541 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2542 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
2543 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
2546 if(IS_INTRA4x4(mb_type
)){
2547 if(simple
|| !s
->encoding
){
2548 if(IS_8x8DCT(mb_type
)){
2549 for(i
=0; i
<16; i
+=4){
2550 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2551 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2552 const int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2553 h
->hpc
.pred8x8l
[ dir
](ptr
, (h
->topleft_samples_available
<<i
)&0x8000,
2554 (h
->topright_samples_available
<<i
)&0x4000, linesize
);
2556 if(nnz
== 1 && h
->mb
[i
*16])
2557 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2559 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
2563 for(i
=0; i
<16; i
++){
2564 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2566 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2569 if(dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
){
2570 const int topright_avail
= (h
->topright_samples_available
<<i
)&0x8000;
2571 assert(mb_y
|| linesize
<= block_offset
[i
]);
2572 if(!topright_avail
){
2573 tr
= ptr
[3 - linesize
]*0x01010101;
2574 topright
= (uint8_t*) &tr
;
2576 topright
= ptr
+ 4 - linesize
;
2580 h
->hpc
.pred4x4
[ dir
](ptr
, topright
, linesize
);
2581 nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2584 if(nnz
== 1 && h
->mb
[i
*16])
2585 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2587 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
2589 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, 0);
2594 h
->hpc
.pred16x16
[ h
->intra16x16_pred_mode
](dest_y
, linesize
);
2596 if(!transform_bypass
)
2597 h264_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
, h
->dequant4_coeff
[0][s
->qscale
][0]);
2599 svq3_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
);
2601 if(h
->deblocking_filter
&& (simple
|| !FRAME_MBAFF
))
2602 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 0, simple
);
2604 hl_motion(h
, dest_y
, dest_cb
, dest_cr
,
2605 s
->me
.qpel_put
, s
->dsp
.put_h264_chroma_pixels_tab
,
2606 s
->me
.qpel_avg
, s
->dsp
.avg_h264_chroma_pixels_tab
,
2607 s
->dsp
.weight_h264_pixels_tab
, s
->dsp
.biweight_h264_pixels_tab
);
2611 if(!IS_INTRA4x4(mb_type
)){
2613 if(IS_INTRA16x16(mb_type
)){
2614 for(i
=0; i
<16; i
++){
2615 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2616 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2617 else if(h
->mb
[i
*16])
2618 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2621 const int di
= IS_8x8DCT(mb_type
) ? 4 : 1;
2622 for(i
=0; i
<16; i
+=di
){
2623 int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2625 if(nnz
==1 && h
->mb
[i
*16])
2626 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2628 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2633 for(i
=0; i
<16; i
++){
2634 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){ //FIXME benchmark weird rule, & below
2635 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2636 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, IS_INTRA(mb_type
) ? 1 : 0);
2642 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2643 uint8_t *dest
[2] = {dest_cb
, dest_cr
};
2644 if(transform_bypass
){
2645 idct_add
= idct_dc_add
= s
->dsp
.add_pixels4
;
2647 idct_add
= s
->dsp
.h264_idct_add
;
2648 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2649 chroma_dc_dequant_idct_c(h
->mb
+ 16*16, h
->chroma_qp
[0], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1:4][h
->chroma_qp
[0]][0]);
2650 chroma_dc_dequant_idct_c(h
->mb
+ 16*16+4*16, h
->chroma_qp
[1], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2:5][h
->chroma_qp
[1]][0]);
2653 for(i
=16; i
<16+8; i
++){
2654 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2655 idct_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2656 else if(h
->mb
[i
*16])
2657 idct_dc_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2660 for(i
=16; i
<16+8; i
++){
2661 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
2662 uint8_t * const ptr
= dest
[(i
&4)>>2] + block_offset
[i
];
2663 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, chroma_qp
[s
->qscale
+ 12] - 12, 2);
2669 if(h
->deblocking_filter
) {
2670 if (!simple
&& FRAME_MBAFF
) {
2671 //FIXME try deblocking one mb at a time?
2672 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2673 const int mb_y
= s
->mb_y
- 1;
2674 uint8_t *pair_dest_y
, *pair_dest_cb
, *pair_dest_cr
;
2675 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
2676 const int mb_type_top
= s
->current_picture
.mb_type
[mb_xy
];
2677 const int mb_type_bottom
= s
->current_picture
.mb_type
[mb_xy
+s
->mb_stride
];
2678 if (!bottom
) return;
2679 pair_dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
2680 pair_dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2681 pair_dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2683 if(IS_INTRA(mb_type_top
| mb_type_bottom
))
2684 xchg_pair_border(h
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, s
->linesize
, s
->uvlinesize
, 0);
2686 backup_pair_border(h
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, s
->linesize
, s
->uvlinesize
);
2689 s
->mb_y
--; h
->mb_xy
-= s
->mb_stride
;
2690 tprintf(h
->s
.avctx
, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x
, mb_y
, pair_dest_y
, dest_y
);
2691 fill_caches(h
, mb_type_top
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2692 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->current_picture
.qscale_table
[mb_xy
]);
2693 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->current_picture
.qscale_table
[mb_xy
]);
2694 filter_mb(h
, mb_x
, mb_y
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, linesize
, uvlinesize
);
2696 s
->mb_y
++; h
->mb_xy
+= s
->mb_stride
;
2697 tprintf(h
->s
.avctx
, "call mbaff filter_mb\n");
2698 fill_caches(h
, mb_type_bottom
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2699 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->current_picture
.qscale_table
[mb_xy
+s
->mb_stride
]);
2700 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->current_picture
.qscale_table
[mb_xy
+s
->mb_stride
]);
2701 filter_mb(h
, mb_x
, mb_y
+1, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2703 tprintf(h
->s
.avctx
, "call filter_mb\n");
2704 backup_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, simple
);
2705 fill_caches(h
, mb_type
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2706 filter_mb_fast(h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2712 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2714 static void hl_decode_mb_simple(H264Context
*h
){
2715 hl_decode_mb_internal(h
, 1);
2719 * Process a macroblock; this handles edge cases, such as interlacing.
2721 static void av_noinline
hl_decode_mb_complex(H264Context
*h
){
2722 hl_decode_mb_internal(h
, 0);
2725 static void hl_decode_mb(H264Context
*h
){
2726 MpegEncContext
* const s
= &h
->s
;
2727 const int mb_xy
= h
->mb_xy
;
2728 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2729 int is_complex
= FRAME_MBAFF
|| MB_FIELD
|| IS_INTRA_PCM(mb_type
) || s
->codec_id
!= CODEC_ID_H264
|| (ENABLE_GRAY
&& (s
->flags
&CODEC_FLAG_GRAY
)) || s
->encoding
;
2735 hl_decode_mb_complex(h
);
2736 else hl_decode_mb_simple(h
);
2739 static void pic_as_field(Picture
*pic
, const int parity
){
2741 for (i
= 0; i
< 4; ++i
) {
2742 if (parity
== PICT_BOTTOM_FIELD
)
2743 pic
->data
[i
] += pic
->linesize
[i
];
2744 pic
->reference
= parity
;
2745 pic
->linesize
[i
] *= 2;
2749 static int split_field_copy(Picture
*dest
, Picture
*src
,
2750 int parity
, int id_add
){
2751 int match
= !!(src
->reference
& parity
);
2755 pic_as_field(dest
, parity
);
2757 dest
->pic_id
+= id_add
;
2764 * Split one reference list into field parts, interleaving by parity
2765 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2766 * set to look at the actual start of data for that field.
2768 * @param dest output list
2769 * @param dest_len maximum number of fields to put in dest
2770 * @param src the source reference list containing fields and/or field pairs
2771 * (aka short_ref/long_ref, or
2772 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2773 * @param src_len number of Picture's in source (pairs and unmatched fields)
2774 * @param parity the parity of the picture being decoded/needing
2775 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2776 * @return number of fields placed in dest
2778 static int split_field_half_ref_list(Picture
*dest
, int dest_len
,
2779 Picture
*src
, int src_len
, int parity
){
2780 int same_parity
= 1;
2786 for (out_i
= 0; out_i
< dest_len
; out_i
+= field_output
) {
2787 if (same_parity
&& same_i
< src_len
) {
2788 field_output
= split_field_copy(dest
+ out_i
, src
+ same_i
,
2790 same_parity
= !field_output
;
2793 } else if (opp_i
< src_len
) {
2794 field_output
= split_field_copy(dest
+ out_i
, src
+ opp_i
,
2795 PICT_FRAME
- parity
, 0);
2796 same_parity
= field_output
;
2808 * Split the reference frame list into a reference field list.
2809 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2810 * The input list contains both reference field pairs and
2811 * unmatched reference fields; it is ordered as spec describes
2812 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2813 * unmatched field pairs are also present. Conceptually this is equivalent
2814 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2816 * @param dest output reference list where ordered fields are to be placed
2817 * @param dest_len max number of fields to place at dest
2818 * @param src source reference list, as described above
2819 * @param src_len number of pictures (pairs and unmatched fields) in src
2820 * @param parity parity of field being currently decoded
2821 * (one of PICT_{TOP,BOTTOM}_FIELD)
2822 * @param long_i index into src array that holds first long reference picture,
2823 * or src_len if no long refs present.
2825 static int split_field_ref_list(Picture
*dest
, int dest_len
,
2826 Picture
*src
, int src_len
,
2827 int parity
, int long_i
){
2829 int i
= split_field_half_ref_list(dest
, dest_len
, src
, long_i
, parity
);
2833 i
+= split_field_half_ref_list(dest
, dest_len
, src
+ long_i
,
2834 src_len
- long_i
, parity
);
2839 * fills the default_ref_list.
2841 static int fill_default_ref_list(H264Context
*h
){
2842 MpegEncContext
* const s
= &h
->s
;
2844 int smallest_poc_greater_than_current
= -1;
2846 Picture sorted_short_ref
[32];
2847 Picture field_entry_list
[2][32];
2848 Picture
*frame_list
[2];
2850 if (FIELD_PICTURE
) {
2851 structure_sel
= PICT_FRAME
;
2852 frame_list
[0] = field_entry_list
[0];
2853 frame_list
[1] = field_entry_list
[1];
2856 frame_list
[0] = h
->default_ref_list
[0];
2857 frame_list
[1] = h
->default_ref_list
[1];
2860 if(h
->slice_type
==FF_B_TYPE
){
2867 /* sort frame according to poc in B slice */
2868 for(out_i
=0; out_i
<h
->short_ref_count
; out_i
++){
2870 int best_poc
=INT_MAX
;
2872 for(i
=0; i
<h
->short_ref_count
; i
++){
2873 const int poc
= h
->short_ref
[i
]->poc
;
2874 if(poc
> limit
&& poc
< best_poc
){
2880 assert(best_i
!= INT_MIN
);
2883 sorted_short_ref
[out_i
]= *h
->short_ref
[best_i
];
2884 tprintf(h
->s
.avctx
, "sorted poc: %d->%d poc:%d fn:%d\n", best_i
, out_i
, sorted_short_ref
[out_i
].poc
, sorted_short_ref
[out_i
].frame_num
);
2885 if (-1 == smallest_poc_greater_than_current
) {
2886 if (h
->short_ref
[best_i
]->poc
>= s
->current_picture_ptr
->poc
) {
2887 smallest_poc_greater_than_current
= out_i
;
2892 tprintf(h
->s
.avctx
, "current poc: %d, smallest_poc_greater_than_current: %d\n", s
->current_picture_ptr
->poc
, smallest_poc_greater_than_current
);
2894 // find the largest poc
2895 for(list
=0; list
<2; list
++){
2898 int step
= list
? -1 : 1;
2900 for(i
=0; i
<h
->short_ref_count
&& index
< h
->ref_count
[list
]; i
++, j
+=step
) {
2902 while(j
<0 || j
>= h
->short_ref_count
){
2903 if(j
!= -99 && step
== (list
? -1 : 1))
2906 j
= smallest_poc_greater_than_current
+ (step
>>1);
2908 sel
= sorted_short_ref
[j
].reference
| structure_sel
;
2909 if(sel
!= PICT_FRAME
) continue;
2910 frame_list
[list
][index
]= sorted_short_ref
[j
];
2911 frame_list
[list
][index
++].pic_id
= sorted_short_ref
[j
].frame_num
;
2913 short_len
[list
] = index
;
2915 for(i
= 0; i
< 16 && index
< h
->ref_count
[ list
]; i
++){
2917 if(h
->long_ref
[i
] == NULL
) continue;
2918 sel
= h
->long_ref
[i
]->reference
| structure_sel
;
2919 if(sel
!= PICT_FRAME
) continue;
2921 frame_list
[ list
][index
]= *h
->long_ref
[i
];
2922 frame_list
[ list
][index
++].pic_id
= i
;
2927 for(list
=0; list
<2; list
++){
2929 len
[list
] = split_field_ref_list(h
->default_ref_list
[list
],
2933 s
->picture_structure
,
2936 // swap the two first elements of L1 when L0 and L1 are identical
2937 if(list
&& len
[0] > 1 && len
[0] == len
[1])
2938 for(i
=0; h
->default_ref_list
[0][i
].data
[0] == h
->default_ref_list
[1][i
].data
[0]; i
++)
2940 FFSWAP(Picture
, h
->default_ref_list
[1][0], h
->default_ref_list
[1][1]);
2944 if(len
[list
] < h
->ref_count
[ list
])
2945 memset(&h
->default_ref_list
[list
][len
[list
]], 0, sizeof(Picture
)*(h
->ref_count
[ list
] - len
[list
]));
2952 for(i
=0; i
<h
->short_ref_count
; i
++){
2954 sel
= h
->short_ref
[i
]->reference
| structure_sel
;
2955 if(sel
!= PICT_FRAME
) continue;
2956 frame_list
[0][index
]= *h
->short_ref
[i
];
2957 frame_list
[0][index
++].pic_id
= h
->short_ref
[i
]->frame_num
;
2960 for(i
= 0; i
< 16; i
++){
2962 if(h
->long_ref
[i
] == NULL
) continue;
2963 sel
= h
->long_ref
[i
]->reference
| structure_sel
;
2964 if(sel
!= PICT_FRAME
) continue;
2965 frame_list
[0][index
]= *h
->long_ref
[i
];
2966 frame_list
[0][index
++].pic_id
= i
;
2970 index
= split_field_ref_list(h
->default_ref_list
[0],
2971 h
->ref_count
[0], frame_list
[0],
2972 index
, s
->picture_structure
,
2975 if(index
< h
->ref_count
[0])
2976 memset(&h
->default_ref_list
[0][index
], 0, sizeof(Picture
)*(h
->ref_count
[0] - index
));
2979 for (i
=0; i
<h
->ref_count
[0]; i
++) {
2980 tprintf(h
->s
.avctx
, "List0: %s fn:%d 0x%p\n", (h
->default_ref_list
[0][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[0][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
2982 if(h
->slice_type
==FF_B_TYPE
){
2983 for (i
=0; i
<h
->ref_count
[1]; i
++) {
2984 tprintf(h
->s
.avctx
, "List1: %s fn:%d 0x%p\n", (h
->default_ref_list
[1][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[1][i
].pic_id
, h
->default_ref_list
[1][i
].data
[0]);
2991 static void print_short_term(H264Context
*h
);
2992 static void print_long_term(H264Context
*h
);
2995 * Extract structure information about the picture described by pic_num in
2996 * the current decoding context (frame or field). Note that pic_num is
2997 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2998 * @param pic_num picture number for which to extract structure information
2999 * @param structure one of PICT_XXX describing structure of picture
3001 * @return frame number (short term) or long term index of picture
3002 * described by pic_num
3004 static int pic_num_extract(H264Context
*h
, int pic_num
, int *structure
){
3005 MpegEncContext
* const s
= &h
->s
;
3007 *structure
= s
->picture_structure
;
3010 /* opposite field */
3011 *structure
^= PICT_FRAME
;
3018 static int decode_ref_pic_list_reordering(H264Context
*h
){
3019 MpegEncContext
* const s
= &h
->s
;
3020 int list
, index
, pic_structure
;
3022 print_short_term(h
);
3024 if(h
->slice_type
==FF_I_TYPE
|| h
->slice_type
==FF_SI_TYPE
) return 0; //FIXME move before func
3026 for(list
=0; list
<h
->list_count
; list
++){
3027 memcpy(h
->ref_list
[list
], h
->default_ref_list
[list
], sizeof(Picture
)*h
->ref_count
[list
]);
3029 if(get_bits1(&s
->gb
)){
3030 int pred
= h
->curr_pic_num
;
3032 for(index
=0; ; index
++){
3033 unsigned int reordering_of_pic_nums_idc
= get_ue_golomb(&s
->gb
);
3034 unsigned int pic_id
;
3036 Picture
*ref
= NULL
;
3038 if(reordering_of_pic_nums_idc
==3)
3041 if(index
>= h
->ref_count
[list
]){
3042 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference count overflow\n");
3046 if(reordering_of_pic_nums_idc
<3){
3047 if(reordering_of_pic_nums_idc
<2){
3048 const unsigned int abs_diff_pic_num
= get_ue_golomb(&s
->gb
) + 1;
3051 if(abs_diff_pic_num
> h
->max_pic_num
){
3052 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "abs_diff_pic_num overflow\n");
3056 if(reordering_of_pic_nums_idc
== 0) pred
-= abs_diff_pic_num
;
3057 else pred
+= abs_diff_pic_num
;
3058 pred
&= h
->max_pic_num
- 1;
3060 frame_num
= pic_num_extract(h
, pred
, &pic_structure
);
3062 for(i
= h
->short_ref_count
-1; i
>=0; i
--){
3063 ref
= h
->short_ref
[i
];
3064 assert(ref
->reference
);
3065 assert(!ref
->long_ref
);
3066 if(ref
->data
[0] != NULL
&&
3067 ref
->frame_num
== frame_num
&&
3068 (ref
->reference
& pic_structure
) &&
3069 ref
->long_ref
== 0) // ignore non existing pictures by testing data[0] pointer
3076 pic_id
= get_ue_golomb(&s
->gb
); //long_term_pic_idx
3078 long_idx
= pic_num_extract(h
, pic_id
, &pic_structure
);
3081 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "long_term_pic_idx overflow\n");
3084 ref
= h
->long_ref
[long_idx
];
3085 assert(!(ref
&& !ref
->reference
));
3086 if(ref
&& (ref
->reference
& pic_structure
)){
3087 ref
->pic_id
= pic_id
;
3088 assert(ref
->long_ref
);
3096 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference picture missing during reorder\n");
3097 memset(&h
->ref_list
[list
][index
], 0, sizeof(Picture
)); //FIXME
3099 for(i
=index
; i
+1<h
->ref_count
[list
]; i
++){
3100 if(ref
->long_ref
== h
->ref_list
[list
][i
].long_ref
&& ref
->pic_id
== h
->ref_list
[list
][i
].pic_id
)
3103 for(; i
> index
; i
--){
3104 h
->ref_list
[list
][i
]= h
->ref_list
[list
][i
-1];
3106 h
->ref_list
[list
][index
]= *ref
;
3108 pic_as_field(&h
->ref_list
[list
][index
], pic_structure
);
3112 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal reordering_of_pic_nums_idc\n");
3118 for(list
=0; list
<h
->list_count
; list
++){
3119 for(index
= 0; index
< h
->ref_count
[list
]; index
++){
3120 if(!h
->ref_list
[list
][index
].data
[0])
3121 h
->ref_list
[list
][index
]= s
->current_picture
;
3125 if(h
->slice_type
==FF_B_TYPE
&& !h
->direct_spatial_mv_pred
)
3126 direct_dist_scale_factor(h
);
3127 direct_ref_list_init(h
);
3131 static void fill_mbaff_ref_list(H264Context
*h
){
3133 for(list
=0; list
<2; list
++){ //FIXME try list_count
3134 for(i
=0; i
<h
->ref_count
[list
]; i
++){
3135 Picture
*frame
= &h
->ref_list
[list
][i
];
3136 Picture
*field
= &h
->ref_list
[list
][16+2*i
];
3139 field
[0].linesize
[j
] <<= 1;
3140 field
[0].reference
= PICT_TOP_FIELD
;
3141 field
[1] = field
[0];
3143 field
[1].data
[j
] += frame
->linesize
[j
];
3144 field
[1].reference
= PICT_BOTTOM_FIELD
;
3146 h
->luma_weight
[list
][16+2*i
] = h
->luma_weight
[list
][16+2*i
+1] = h
->luma_weight
[list
][i
];
3147 h
->luma_offset
[list
][16+2*i
] = h
->luma_offset
[list
][16+2*i
+1] = h
->luma_offset
[list
][i
];
3149 h
->chroma_weight
[list
][16+2*i
][j
] = h
->chroma_weight
[list
][16+2*i
+1][j
] = h
->chroma_weight
[list
][i
][j
];
3150 h
->chroma_offset
[list
][16+2*i
][j
] = h
->chroma_offset
[list
][16+2*i
+1][j
] = h
->chroma_offset
[list
][i
][j
];
3154 for(j
=0; j
<h
->ref_count
[1]; j
++){
3155 for(i
=0; i
<h
->ref_count
[0]; i
++)
3156 h
->implicit_weight
[j
][16+2*i
] = h
->implicit_weight
[j
][16+2*i
+1] = h
->implicit_weight
[j
][i
];
3157 memcpy(h
->implicit_weight
[16+2*j
], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
3158 memcpy(h
->implicit_weight
[16+2*j
+1], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
3162 static int pred_weight_table(H264Context
*h
){
3163 MpegEncContext
* const s
= &h
->s
;
3165 int luma_def
, chroma_def
;
3168 h
->use_weight_chroma
= 0;
3169 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3170 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3171 luma_def
= 1<<h
->luma_log2_weight_denom
;
3172 chroma_def
= 1<<h
->chroma_log2_weight_denom
;
3174 for(list
=0; list
<2; list
++){
3175 for(i
=0; i
<h
->ref_count
[list
]; i
++){
3176 int luma_weight_flag
, chroma_weight_flag
;
3178 luma_weight_flag
= get_bits1(&s
->gb
);
3179 if(luma_weight_flag
){
3180 h
->luma_weight
[list
][i
]= get_se_golomb(&s
->gb
);
3181 h
->luma_offset
[list
][i
]= get_se_golomb(&s
->gb
);
3182 if( h
->luma_weight
[list
][i
] != luma_def
3183 || h
->luma_offset
[list
][i
] != 0)
3186 h
->luma_weight
[list
][i
]= luma_def
;
3187 h
->luma_offset
[list
][i
]= 0;
3190 chroma_weight_flag
= get_bits1(&s
->gb
);
3191 if(chroma_weight_flag
){
3194 h
->chroma_weight
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3195 h
->chroma_offset
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3196 if( h
->chroma_weight
[list
][i
][j
] != chroma_def
3197 || h
->chroma_offset
[list
][i
][j
] != 0)
3198 h
->use_weight_chroma
= 1;
3203 h
->chroma_weight
[list
][i
][j
]= chroma_def
;
3204 h
->chroma_offset
[list
][i
][j
]= 0;
3208 if(h
->slice_type
!= FF_B_TYPE
) break;
3210 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
3214 static void implicit_weight_table(H264Context
*h
){
3215 MpegEncContext
* const s
= &h
->s
;
3217 int cur_poc
= s
->current_picture_ptr
->poc
;
3219 if( h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1
3220 && h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2*cur_poc
){
3222 h
->use_weight_chroma
= 0;
3227 h
->use_weight_chroma
= 2;
3228 h
->luma_log2_weight_denom
= 5;
3229 h
->chroma_log2_weight_denom
= 5;
3231 for(ref0
=0; ref0
< h
->ref_count
[0]; ref0
++){
3232 int poc0
= h
->ref_list
[0][ref0
].poc
;
3233 for(ref1
=0; ref1
< h
->ref_count
[1]; ref1
++){
3234 int poc1
= h
->ref_list
[1][ref1
].poc
;
3235 int td
= av_clip(poc1
- poc0
, -128, 127);
3237 int tb
= av_clip(cur_poc
- poc0
, -128, 127);
3238 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
3239 int dist_scale_factor
= av_clip((tb
*tx
+ 32) >> 6, -1024, 1023) >> 2;
3240 if(dist_scale_factor
< -64 || dist_scale_factor
> 128)
3241 h
->implicit_weight
[ref0
][ref1
] = 32;
3243 h
->implicit_weight
[ref0
][ref1
] = 64 - dist_scale_factor
;
3245 h
->implicit_weight
[ref0
][ref1
] = 32;
3251 * Mark a picture as no longer needed for reference. The refmask
3252 * argument allows unreferencing of individual fields or the whole frame.
3253 * If the picture becomes entirely unreferenced, but is being held for
3254 * display purposes, it is marked as such.
3255 * @param refmask mask of fields to unreference; the mask is bitwise
3256 * anded with the reference marking of pic
3257 * @return non-zero if pic becomes entirely unreferenced (except possibly
3258 * for display purposes) zero if one of the fields remains in
3261 static inline int unreference_pic(H264Context
*h
, Picture
*pic
, int refmask
){
3263 if (pic
->reference
&= refmask
) {
3266 if(pic
== h
->delayed_output_pic
)
3267 pic
->reference
=DELAYED_PIC_REF
;
3269 for(i
= 0; h
->delayed_pic
[i
]; i
++)
3270 if(pic
== h
->delayed_pic
[i
]){
3271 pic
->reference
=DELAYED_PIC_REF
;
3280 * instantaneous decoder refresh.
3282 static void idr(H264Context
*h
){
3285 for(i
=0; i
<16; i
++){
3286 if (h
->long_ref
[i
] != NULL
) {
3287 unreference_pic(h
, h
->long_ref
[i
], 0);
3288 h
->long_ref
[i
]= NULL
;
3291 h
->long_ref_count
=0;
3293 for(i
=0; i
<h
->short_ref_count
; i
++){
3294 unreference_pic(h
, h
->short_ref
[i
], 0);
3295 h
->short_ref
[i
]= NULL
;
3297 h
->short_ref_count
=0;
3300 /* forget old pics after a seek */
3301 static void flush_dpb(AVCodecContext
*avctx
){
3302 H264Context
*h
= avctx
->priv_data
;
3304 for(i
=0; i
<16; i
++) {
3305 if(h
->delayed_pic
[i
])
3306 h
->delayed_pic
[i
]->reference
= 0;
3307 h
->delayed_pic
[i
]= NULL
;
3309 if(h
->delayed_output_pic
)
3310 h
->delayed_output_pic
->reference
= 0;
3311 h
->delayed_output_pic
= NULL
;
3313 if(h
->s
.current_picture_ptr
)
3314 h
->s
.current_picture_ptr
->reference
= 0;
3315 h
->s
.first_field
= 0;
3316 ff_mpeg_flush(avctx
);
3320 * Find a Picture in the short term reference list by frame number.
3321 * @param frame_num frame number to search for
3322 * @param idx the index into h->short_ref where returned picture is found
3323 * undefined if no picture found.
3324 * @return pointer to the found picture, or NULL if no pic with the provided
3325 * frame number is found
3327 static Picture
* find_short(H264Context
*h
, int frame_num
, int *idx
){
3328 MpegEncContext
* const s
= &h
->s
;
3331 for(i
=0; i
<h
->short_ref_count
; i
++){
3332 Picture
*pic
= h
->short_ref
[i
];
3333 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3334 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d %d %p\n", i
, pic
->frame_num
, pic
);
3335 if(pic
->frame_num
== frame_num
) {
3344 * Remove a picture from the short term reference list by its index in
3345 * that list. This does no checking on the provided index; it is assumed
3346 * to be valid. Other list entries are shifted down.
3347 * @param i index into h->short_ref of picture to remove.
3349 static void remove_short_at_index(H264Context
*h
, int i
){
3350 assert(i
> 0 && i
< h
->short_ref_count
);
3351 h
->short_ref
[i
]= NULL
;
3352 if (--h
->short_ref_count
)
3353 memmove(&h
->short_ref
[i
], &h
->short_ref
[i
+1], (h
->short_ref_count
- i
)*sizeof(Picture
*));
3358 * @return the removed picture or NULL if an error occurs
3360 static Picture
* remove_short(H264Context
*h
, int frame_num
){
3361 MpegEncContext
* const s
= &h
->s
;
3365 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3366 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "remove short %d count %d\n", frame_num
, h
->short_ref_count
);
3368 pic
= find_short(h
, frame_num
, &i
);
3370 remove_short_at_index(h
, i
);
3376 * Remove a picture from the long term reference list by its index in
3377 * that list. This does no checking on the provided index; it is assumed
3378 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3379 * @param i index into h->long_ref of picture to remove.
3381 static void remove_long_at_index(H264Context
*h
, int i
){
3382 h
->long_ref
[i
]= NULL
;
3383 h
->long_ref_count
--;
3388 * @return the removed picture or NULL if an error occurs
3390 static Picture
* remove_long(H264Context
*h
, int i
){
3393 pic
= h
->long_ref
[i
];
3395 remove_long_at_index(h
, i
);
3401 * print short term list
3403 static void print_short_term(H264Context
*h
) {
3405 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3406 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "short term list:\n");
3407 for(i
=0; i
<h
->short_ref_count
; i
++){
3408 Picture
*pic
= h
->short_ref
[i
];
3409 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3415 * print long term list
3417 static void print_long_term(H264Context
*h
) {
3419 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3420 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "long term list:\n");
3421 for(i
= 0; i
< 16; i
++){
3422 Picture
*pic
= h
->long_ref
[i
];
3424 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3431 * Executes the reference picture marking (memory management control operations).
3433 static int execute_ref_pic_marking(H264Context
*h
, MMCO
*mmco
, int mmco_count
){
3434 MpegEncContext
* const s
= &h
->s
;
3436 int current_ref_assigned
=0;
3439 if((s
->avctx
->debug
&FF_DEBUG_MMCO
) && mmco_count
==0)
3440 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "no mmco here\n");
3442 for(i
=0; i
<mmco_count
; i
++){
3443 int structure
, frame_num
, unref_pic
;
3444 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3445 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco:%d %d %d\n", h
->mmco
[i
].opcode
, h
->mmco
[i
].short_pic_num
, h
->mmco
[i
].long_arg
);
3447 switch(mmco
[i
].opcode
){
3448 case MMCO_SHORT2UNUSED
:
3449 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3450 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref short %d count %d\n", h
->mmco
[i
].short_pic_num
, h
->short_ref_count
);
3451 frame_num
= pic_num_extract(h
, mmco
[i
].short_pic_num
, &structure
);
3452 pic
= find_short(h
, frame_num
, &j
);
3454 if (unreference_pic(h
, pic
, structure
^ PICT_FRAME
))
3455 remove_short_at_index(h
, j
);
3456 } else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3457 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref short failure\n");
3459 case MMCO_SHORT2LONG
:
3460 if (FIELD_PICTURE
&& mmco
[i
].long_arg
< h
->long_ref_count
&&
3461 h
->long_ref
[mmco
[i
].long_arg
]->frame_num
==
3462 mmco
[i
].short_pic_num
/ 2) {
3463 /* do nothing, we've already moved this field pair. */
3465 int frame_num
= mmco
[i
].short_pic_num
>> FIELD_PICTURE
;
3467 pic
= remove_long(h
, mmco
[i
].long_arg
);
3468 if(pic
) unreference_pic(h
, pic
, 0);
3470 h
->long_ref
[ mmco
[i
].long_arg
]= remove_short(h
, frame_num
);
3471 if (h
->long_ref
[ mmco
[i
].long_arg
]){
3472 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3473 h
->long_ref_count
++;
3477 case MMCO_LONG2UNUSED
:
3478 j
= pic_num_extract(h
, mmco
[i
].long_arg
, &structure
);
3479 pic
= h
->long_ref
[j
];
3481 if (unreference_pic(h
, pic
, structure
^ PICT_FRAME
))
3482 remove_long_at_index(h
, j
);
3483 } else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3484 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref long failure\n");
3488 if (FIELD_PICTURE
&& !s
->first_field
) {
3489 if (h
->long_ref
[mmco
[i
].long_arg
] == s
->current_picture_ptr
) {
3490 /* Just mark second field as referenced */
3492 } else if (s
->current_picture_ptr
->reference
) {
3493 /* First field in pair is in short term list or
3494 * at a different long term index.
3495 * This is not allowed; see 7.4.3, notes 2 and 3.
3496 * Report the problem and keep the pair where it is,
3497 * and mark this field valid.
3499 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3500 "illegal long term reference assignment for second "
3501 "field in complementary field pair (first field is "
3502 "short term or has non-matching long index)\n");
3508 pic
= remove_long(h
, mmco
[i
].long_arg
);
3509 if(pic
) unreference_pic(h
, pic
, 0);
3511 h
->long_ref
[ mmco
[i
].long_arg
]= s
->current_picture_ptr
;
3512 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3513 h
->long_ref_count
++;
3516 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3517 current_ref_assigned
=1;
3519 case MMCO_SET_MAX_LONG
:
3520 assert(mmco
[i
].long_arg
<= 16);
3521 // just remove the long term which index is greater than new max
3522 for(j
= mmco
[i
].long_arg
; j
<16; j
++){
3523 pic
= remove_long(h
, j
);
3524 if (pic
) unreference_pic(h
, pic
, 0);
3528 while(h
->short_ref_count
){
3529 pic
= remove_short(h
, h
->short_ref
[0]->frame_num
);
3530 if(pic
) unreference_pic(h
, pic
, 0);
3532 for(j
= 0; j
< 16; j
++) {
3533 pic
= remove_long(h
, j
);
3534 if(pic
) unreference_pic(h
, pic
, 0);
3541 if (!current_ref_assigned
&& FIELD_PICTURE
&&
3542 !s
->first_field
&& s
->current_picture_ptr
->reference
) {
3544 /* Second field of complementary field pair; the first field of
3545 * which is already referenced. If short referenced, it
3546 * should be first entry in short_ref. If not, it must exist
3547 * in long_ref; trying to put it on the short list here is an
3548 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3550 if (h
->short_ref_count
&& h
->short_ref
[0] == s
->current_picture_ptr
) {
3551 /* Just mark the second field valid */
3552 s
->current_picture_ptr
->reference
= PICT_FRAME
;
3553 } else if (s
->current_picture_ptr
->long_ref
) {
3554 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term reference "
3555 "assignment for second field "
3556 "in complementary field pair "
3557 "(first field is long term)\n");
3560 * First field in reference, but not in any sensible place on our
3561 * reference lists. This shouldn't happen unless reference
3562 * handling somewhere else is wrong.
3566 current_ref_assigned
= 1;
3569 if(!current_ref_assigned
){
3570 pic
= remove_short(h
, s
->current_picture_ptr
->frame_num
);
3572 unreference_pic(h
, pic
, 0);
3573 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term buffer state detected\n");
3576 if(h
->short_ref_count
)
3577 memmove(&h
->short_ref
[1], &h
->short_ref
[0], h
->short_ref_count
*sizeof(Picture
*));
3579 h
->short_ref
[0]= s
->current_picture_ptr
;
3580 h
->short_ref
[0]->long_ref
=0;
3581 h
->short_ref_count
++;
3582 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3585 if (h
->long_ref_count
+ h
->short_ref_count
> h
->sps
.ref_frame_count
){
3587 /* We have too many reference frames, probably due to corrupted
3588 * stream. Need to discard one frame. Prevents overrun of the
3589 * short_ref and long_ref buffers.
3591 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3592 "number of reference frames exceeds max (probably "
3593 "corrupt input), discarding one\n");
3595 if (h
->long_ref_count
) {
3596 for (i
= 0; i
< 16; ++i
)
3601 pic
= h
->long_ref
[i
];
3602 remove_long_at_index(h
, i
);
3604 pic
= h
->short_ref
[h
->short_ref_count
- 1];
3605 remove_short_at_index(h
, h
->short_ref_count
- 1);
3607 unreference_pic(h
, pic
, 0);
3610 print_short_term(h
);
3615 static int decode_ref_pic_marking(H264Context
*h
, GetBitContext
*gb
){
3616 MpegEncContext
* const s
= &h
->s
;
3619 if(h
->nal_unit_type
== NAL_IDR_SLICE
){ //FIXME fields
3620 s
->broken_link
= get_bits1(gb
) -1;
3621 h
->mmco
[0].long_arg
= get_bits1(gb
) - 1; // current_long_term_idx
3622 if(h
->mmco
[0].long_arg
== -1)
3625 h
->mmco
[0].opcode
= MMCO_LONG
;
3629 if(get_bits1(gb
)){ // adaptive_ref_pic_marking_mode_flag
3630 for(i
= 0; i
<MAX_MMCO_COUNT
; i
++) {
3631 MMCOOpcode opcode
= get_ue_golomb(gb
);
3633 h
->mmco
[i
].opcode
= opcode
;
3634 if(opcode
==MMCO_SHORT2UNUSED
|| opcode
==MMCO_SHORT2LONG
){
3635 h
->mmco
[i
].short_pic_num
= (h
->curr_pic_num
- get_ue_golomb(gb
) - 1) & (h
->max_pic_num
- 1);
3636 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3637 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3641 if(opcode
==MMCO_SHORT2LONG
|| opcode
==MMCO_LONG2UNUSED
|| opcode
==MMCO_LONG
|| opcode
==MMCO_SET_MAX_LONG
){
3642 unsigned int long_arg
= get_ue_golomb(gb
);
3643 if(long_arg
>= 32 || (long_arg
>= 16 && !(opcode
== MMCO_LONG2UNUSED
&& FIELD_PICTURE
))){
3644 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal long ref in memory management control operation %d\n", opcode
);
3647 h
->mmco
[i
].long_arg
= long_arg
;
3650 if(opcode
> (unsigned)MMCO_LONG
){
3651 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal memory management control operation %d\n", opcode
);
3654 if(opcode
== MMCO_END
)
3659 assert(h
->long_ref_count
+ h
->short_ref_count
<= h
->sps
.ref_frame_count
);
3661 if(h
->short_ref_count
&& h
->long_ref_count
+ h
->short_ref_count
== h
->sps
.ref_frame_count
&&
3662 !(FIELD_PICTURE
&& !s
->first_field
&& s
->current_picture_ptr
->reference
)) {
3663 h
->mmco
[0].opcode
= MMCO_SHORT2UNUSED
;
3664 h
->mmco
[0].short_pic_num
= h
->short_ref
[ h
->short_ref_count
- 1 ]->frame_num
;
3666 if (FIELD_PICTURE
) {
3667 h
->mmco
[0].short_pic_num
*= 2;
3668 h
->mmco
[1].opcode
= MMCO_SHORT2UNUSED
;
3669 h
->mmco
[1].short_pic_num
= h
->mmco
[0].short_pic_num
+ 1;
3680 static int init_poc(H264Context
*h
){
3681 MpegEncContext
* const s
= &h
->s
;
3682 const int max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
3685 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3686 h
->frame_num_offset
= 0;
3688 if(h
->frame_num
< h
->prev_frame_num
)
3689 h
->frame_num_offset
= h
->prev_frame_num_offset
+ max_frame_num
;
3691 h
->frame_num_offset
= h
->prev_frame_num_offset
;
3694 if(h
->sps
.poc_type
==0){
3695 const int max_poc_lsb
= 1<<h
->sps
.log2_max_poc_lsb
;
3697 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3702 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/2)
3703 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
3704 else if(h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/2)
3705 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
3707 h
->poc_msb
= h
->prev_poc_msb
;
3708 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3710 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
3711 if(s
->picture_structure
== PICT_FRAME
)
3712 field_poc
[1] += h
->delta_poc_bottom
;
3713 }else if(h
->sps
.poc_type
==1){
3714 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
3717 if(h
->sps
.poc_cycle_length
!= 0)
3718 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
3722 if(h
->nal_ref_idc
==0 && abs_frame_num
> 0)
3725 expected_delta_per_poc_cycle
= 0;
3726 for(i
=0; i
< h
->sps
.poc_cycle_length
; i
++)
3727 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[ i
]; //FIXME integrate during sps parse
3729 if(abs_frame_num
> 0){
3730 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
3731 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
3733 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
3734 for(i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
3735 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[ i
];
3739 if(h
->nal_ref_idc
== 0)
3740 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
3742 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
3743 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
3745 if(s
->picture_structure
== PICT_FRAME
)
3746 field_poc
[1] += h
->delta_poc
[1];
3749 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3752 if(h
->nal_ref_idc
) poc
= 2*(h
->frame_num_offset
+ h
->frame_num
);
3753 else poc
= 2*(h
->frame_num_offset
+ h
->frame_num
) - 1;
3759 if(s
->picture_structure
!= PICT_BOTTOM_FIELD
) {
3760 s
->current_picture_ptr
->field_poc
[0]= field_poc
[0];
3761 s
->current_picture_ptr
->poc
= field_poc
[0];
3763 if(s
->picture_structure
!= PICT_TOP_FIELD
) {
3764 s
->current_picture_ptr
->field_poc
[1]= field_poc
[1];
3765 s
->current_picture_ptr
->poc
= field_poc
[1];
3767 if(!FIELD_PICTURE
|| !s
->first_field
) {
3768 Picture
*cur
= s
->current_picture_ptr
;
3769 cur
->poc
= FFMIN(cur
->field_poc
[0], cur
->field_poc
[1]);
3777 * initialize scan tables
3779 static void init_scan_tables(H264Context
*h
){
3780 MpegEncContext
* const s
= &h
->s
;
3782 if(s
->dsp
.h264_idct_add
== ff_h264_idct_add_c
){ //FIXME little ugly
3783 memcpy(h
->zigzag_scan
, zigzag_scan
, 16*sizeof(uint8_t));
3784 memcpy(h
-> field_scan
, field_scan
, 16*sizeof(uint8_t));
3786 for(i
=0; i
<16; i
++){
3787 #define T(x) (x>>2) | ((x<<2) & 0xF)
3788 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
3789 h
-> field_scan
[i
] = T( field_scan
[i
]);
3793 if(s
->dsp
.h264_idct8_add
== ff_h264_idct8_add_c
){
3794 memcpy(h
->zigzag_scan8x8
, zigzag_scan8x8
, 64*sizeof(uint8_t));
3795 memcpy(h
->zigzag_scan8x8_cavlc
, zigzag_scan8x8_cavlc
, 64*sizeof(uint8_t));
3796 memcpy(h
->field_scan8x8
, field_scan8x8
, 64*sizeof(uint8_t));
3797 memcpy(h
->field_scan8x8_cavlc
, field_scan8x8_cavlc
, 64*sizeof(uint8_t));
3799 for(i
=0; i
<64; i
++){
3800 #define T(x) (x>>3) | ((x&7)<<3)
3801 h
->zigzag_scan8x8
[i
] = T(zigzag_scan8x8
[i
]);
3802 h
->zigzag_scan8x8_cavlc
[i
] = T(zigzag_scan8x8_cavlc
[i
]);
3803 h
->field_scan8x8
[i
] = T(field_scan8x8
[i
]);
3804 h
->field_scan8x8_cavlc
[i
] = T(field_scan8x8_cavlc
[i
]);
3808 if(h
->sps
.transform_bypass
){ //FIXME same ugly
3809 h
->zigzag_scan_q0
= zigzag_scan
;
3810 h
->zigzag_scan8x8_q0
= zigzag_scan8x8
;
3811 h
->zigzag_scan8x8_cavlc_q0
= zigzag_scan8x8_cavlc
;
3812 h
->field_scan_q0
= field_scan
;
3813 h
->field_scan8x8_q0
= field_scan8x8
;
3814 h
->field_scan8x8_cavlc_q0
= field_scan8x8_cavlc
;
3816 h
->zigzag_scan_q0
= h
->zigzag_scan
;
3817 h
->zigzag_scan8x8_q0
= h
->zigzag_scan8x8
;
3818 h
->zigzag_scan8x8_cavlc_q0
= h
->zigzag_scan8x8_cavlc
;
3819 h
->field_scan_q0
= h
->field_scan
;
3820 h
->field_scan8x8_q0
= h
->field_scan8x8
;
3821 h
->field_scan8x8_cavlc_q0
= h
->field_scan8x8_cavlc
;
3826 * Replicates H264 "master" context to thread contexts.
3828 static void clone_slice(H264Context
*dst
, H264Context
*src
)
3830 memcpy(dst
->block_offset
, src
->block_offset
, sizeof(dst
->block_offset
));
3831 dst
->s
.current_picture_ptr
= src
->s
.current_picture_ptr
;
3832 dst
->s
.current_picture
= src
->s
.current_picture
;
3833 dst
->s
.linesize
= src
->s
.linesize
;
3834 dst
->s
.uvlinesize
= src
->s
.uvlinesize
;
3835 dst
->s
.first_field
= src
->s
.first_field
;
3837 dst
->prev_poc_msb
= src
->prev_poc_msb
;
3838 dst
->prev_poc_lsb
= src
->prev_poc_lsb
;
3839 dst
->prev_frame_num_offset
= src
->prev_frame_num_offset
;
3840 dst
->prev_frame_num
= src
->prev_frame_num
;
3841 dst
->short_ref_count
= src
->short_ref_count
;
3843 memcpy(dst
->short_ref
, src
->short_ref
, sizeof(dst
->short_ref
));
3844 memcpy(dst
->long_ref
, src
->long_ref
, sizeof(dst
->long_ref
));
3845 memcpy(dst
->default_ref_list
, src
->default_ref_list
, sizeof(dst
->default_ref_list
));
3846 memcpy(dst
->ref_list
, src
->ref_list
, sizeof(dst
->ref_list
));
3848 memcpy(dst
->dequant4_coeff
, src
->dequant4_coeff
, sizeof(src
->dequant4_coeff
));
3849 memcpy(dst
->dequant8_coeff
, src
->dequant8_coeff
, sizeof(src
->dequant8_coeff
));
3853 * decodes a slice header.
3854 * This will also call MPV_common_init() and frame_start() as needed.
3856 * @param h h264context
3857 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3859 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3861 static int decode_slice_header(H264Context
*h
, H264Context
*h0
){
3862 MpegEncContext
* const s
= &h
->s
;
3863 MpegEncContext
* const s0
= &h0
->s
;
3864 unsigned int first_mb_in_slice
;
3865 unsigned int pps_id
;
3866 int num_ref_idx_active_override_flag
;
3867 static const uint8_t slice_type_map
[5]= {FF_P_TYPE
, FF_B_TYPE
, FF_I_TYPE
, FF_SP_TYPE
, FF_SI_TYPE
};
3868 unsigned int slice_type
, tmp
, i
;
3869 int default_ref_list_done
= 0;
3870 int last_pic_structure
;
3872 s
->dropable
= h
->nal_ref_idc
== 0;
3874 if((s
->avctx
->flags2
& CODEC_FLAG2_FAST
) && !h
->nal_ref_idc
){
3875 s
->me
.qpel_put
= s
->dsp
.put_2tap_qpel_pixels_tab
;
3876 s
->me
.qpel_avg
= s
->dsp
.avg_2tap_qpel_pixels_tab
;
3878 s
->me
.qpel_put
= s
->dsp
.put_h264_qpel_pixels_tab
;
3879 s
->me
.qpel_avg
= s
->dsp
.avg_h264_qpel_pixels_tab
;
3882 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
3884 if((s
->flags2
& CODEC_FLAG2_CHUNKS
) && first_mb_in_slice
== 0){
3885 h0
->current_slice
= 0;
3886 if (!s0
->first_field
)
3887 s
->current_picture_ptr
= NULL
;
3890 slice_type
= get_ue_golomb(&s
->gb
);
3892 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "slice type too large (%d) at %d %d\n", h
->slice_type
, s
->mb_x
, s
->mb_y
);
3897 h
->slice_type_fixed
=1;
3899 h
->slice_type_fixed
=0;
3901 slice_type
= slice_type_map
[ slice_type
];
3902 if (slice_type
== FF_I_TYPE
3903 || (h0
->current_slice
!= 0 && slice_type
== h0
->last_slice_type
) ) {
3904 default_ref_list_done
= 1;
3906 h
->slice_type
= slice_type
;
3908 s
->pict_type
= h
->slice_type
; // to make a few old func happy, it's wrong though
3909 if (s
->pict_type
== FF_B_TYPE
&& s0
->last_picture_ptr
== NULL
) {
3910 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3911 "B picture before any references, skipping\n");
3915 pps_id
= get_ue_golomb(&s
->gb
);
3916 if(pps_id
>=MAX_PPS_COUNT
){
3917 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
3920 if(!h0
->pps_buffers
[pps_id
]) {
3921 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non existing PPS referenced\n");
3924 h
->pps
= *h0
->pps_buffers
[pps_id
];
3926 if(!h0
->sps_buffers
[h
->pps
.sps_id
]) {
3927 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non existing SPS referenced\n");
3930 h
->sps
= *h0
->sps_buffers
[h
->pps
.sps_id
];
3932 if(h
== h0
&& h
->dequant_coeff_pps
!= pps_id
){
3933 h
->dequant_coeff_pps
= pps_id
;
3934 init_dequant_tables(h
);
3937 s
->mb_width
= h
->sps
.mb_width
;
3938 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
3940 h
->b_stride
= s
->mb_width
*4;
3941 h
->b8_stride
= s
->mb_width
*2;
3943 s
->width
= 16*s
->mb_width
- 2*FFMIN(h
->sps
.crop_right
, 7);
3944 if(h
->sps
.frame_mbs_only_flag
)
3945 s
->height
= 16*s
->mb_height
- 2*FFMIN(h
->sps
.crop_bottom
, 7);
3947 s
->height
= 16*s
->mb_height
- 4*FFMIN(h
->sps
.crop_bottom
, 3);
3949 if (s
->context_initialized
3950 && ( s
->width
!= s
->avctx
->width
|| s
->height
!= s
->avctx
->height
)) {
3952 return -1; // width / height changed during parallelized decoding
3956 if (!s
->context_initialized
) {
3958 return -1; // we cant (re-)initialize context during parallel decoding
3959 if (MPV_common_init(s
) < 0)
3963 init_scan_tables(h
);
3966 for(i
= 1; i
< s
->avctx
->thread_count
; i
++) {
3968 c
= h
->thread_context
[i
] = av_malloc(sizeof(H264Context
));
3969 memcpy(c
, h
->s
.thread_context
[i
], sizeof(MpegEncContext
));
3970 memset(&c
->s
+ 1, 0, sizeof(H264Context
) - sizeof(MpegEncContext
));
3973 init_scan_tables(c
);
3977 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
3978 if(context_init(h
->thread_context
[i
]) < 0)
3981 s
->avctx
->width
= s
->width
;
3982 s
->avctx
->height
= s
->height
;
3983 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
3984 if(!s
->avctx
->sample_aspect_ratio
.den
)
3985 s
->avctx
->sample_aspect_ratio
.den
= 1;
3987 if(h
->sps
.timing_info_present_flag
){
3988 s
->avctx
->time_base
= (AVRational
){h
->sps
.num_units_in_tick
* 2, h
->sps
.time_scale
};
3989 if(h
->x264_build
> 0 && h
->x264_build
< 44)
3990 s
->avctx
->time_base
.den
*= 2;
3991 av_reduce(&s
->avctx
->time_base
.num
, &s
->avctx
->time_base
.den
,
3992 s
->avctx
->time_base
.num
, s
->avctx
->time_base
.den
, 1<<30);
3996 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
3999 h
->mb_aff_frame
= 0;
4000 last_pic_structure
= s0
->picture_structure
;
4001 if(h
->sps
.frame_mbs_only_flag
){
4002 s
->picture_structure
= PICT_FRAME
;
4004 if(get_bits1(&s
->gb
)) { //field_pic_flag
4005 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); //bottom_field_flag
4007 s
->picture_structure
= PICT_FRAME
;
4008 h
->mb_aff_frame
= h
->sps
.mb_aff
;
4012 if(h0
->current_slice
== 0){
4013 /* See if we have a decoded first field looking for a pair... */
4014 if (s0
->first_field
) {
4015 assert(s0
->current_picture_ptr
);
4016 assert(s0
->current_picture_ptr
->data
[0]);
4017 assert(s0
->current_picture_ptr
->reference
!= DELAYED_PIC_REF
);
4019 /* figure out if we have a complementary field pair */
4020 if (!FIELD_PICTURE
|| s
->picture_structure
== last_pic_structure
) {
4022 * Previous field is unmatched. Don't display it, but let it
4023 * remain for reference if marked as such.
4025 s0
->current_picture_ptr
= NULL
;
4026 s0
->first_field
= FIELD_PICTURE
;
4029 if (h
->nal_ref_idc
&&
4030 s0
->current_picture_ptr
->reference
&&
4031 s0
->current_picture_ptr
->frame_num
!= h
->frame_num
) {
4033 * This and previous field were reference, but had
4034 * different frame_nums. Consider this field first in
4035 * pair. Throw away previous field except for reference
4038 s0
->first_field
= 1;
4039 s0
->current_picture_ptr
= NULL
;
4042 /* Second field in complementary pair */
4043 s0
->first_field
= 0;
4048 /* Frame or first field in a potentially complementary pair */
4049 assert(!s0
->current_picture_ptr
);
4050 s0
->first_field
= FIELD_PICTURE
;
4053 if((!FIELD_PICTURE
|| s0
->first_field
) && frame_start(h
) < 0) {
4054 s0
->first_field
= 0;
4061 s
->current_picture_ptr
->frame_num
= h
->frame_num
; //FIXME frame_num cleanup
4063 assert(s
->mb_num
== s
->mb_width
* s
->mb_height
);
4064 if(first_mb_in_slice
<< FIELD_OR_MBAFF_PICTURE
>= s
->mb_num
||
4065 first_mb_in_slice
>= s
->mb_num
){
4066 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "first_mb_in_slice overflow\n");
4069 s
->resync_mb_x
= s
->mb_x
= first_mb_in_slice
% s
->mb_width
;
4070 s
->resync_mb_y
= s
->mb_y
= (first_mb_in_slice
/ s
->mb_width
) << FIELD_OR_MBAFF_PICTURE
;
4071 if (s
->picture_structure
== PICT_BOTTOM_FIELD
)
4072 s
->resync_mb_y
= s
->mb_y
= s
->mb_y
+ 1;
4073 assert(s
->mb_y
< s
->mb_height
);
4075 if(s
->picture_structure
==PICT_FRAME
){
4076 h
->curr_pic_num
= h
->frame_num
;
4077 h
->max_pic_num
= 1<< h
->sps
.log2_max_frame_num
;
4079 h
->curr_pic_num
= 2*h
->frame_num
+ 1;
4080 h
->max_pic_num
= 1<<(h
->sps
.log2_max_frame_num
+ 1);
4083 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4084 get_ue_golomb(&s
->gb
); /* idr_pic_id */
4087 if(h
->sps
.poc_type
==0){
4088 h
->poc_lsb
= get_bits(&s
->gb
, h
->sps
.log2_max_poc_lsb
);
4090 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
){
4091 h
->delta_poc_bottom
= get_se_golomb(&s
->gb
);
4095 if(h
->sps
.poc_type
==1 && !h
->sps
.delta_pic_order_always_zero_flag
){
4096 h
->delta_poc
[0]= get_se_golomb(&s
->gb
);
4098 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
)
4099 h
->delta_poc
[1]= get_se_golomb(&s
->gb
);
4104 if(h
->pps
.redundant_pic_cnt_present
){
4105 h
->redundant_pic_count
= get_ue_golomb(&s
->gb
);
4108 //set defaults, might be overriden a few line later
4109 h
->ref_count
[0]= h
->pps
.ref_count
[0];
4110 h
->ref_count
[1]= h
->pps
.ref_count
[1];
4112 if(h
->slice_type
== FF_P_TYPE
|| h
->slice_type
== FF_SP_TYPE
|| h
->slice_type
== FF_B_TYPE
){
4113 if(h
->slice_type
== FF_B_TYPE
){
4114 h
->direct_spatial_mv_pred
= get_bits1(&s
->gb
);
4115 if(FIELD_PICTURE
&& h
->direct_spatial_mv_pred
)
4116 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "PAFF + spatial direct mode is not implemented\n");
4118 num_ref_idx_active_override_flag
= get_bits1(&s
->gb
);
4120 if(num_ref_idx_active_override_flag
){
4121 h
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
4122 if(h
->slice_type
==FF_B_TYPE
)
4123 h
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
4125 if(h
->ref_count
[0]-1 > 32-1 || h
->ref_count
[1]-1 > 32-1){
4126 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow\n");
4127 h
->ref_count
[0]= h
->ref_count
[1]= 1;
4131 if(h
->slice_type
== FF_B_TYPE
)
4138 if(!default_ref_list_done
){
4139 fill_default_ref_list(h
);
4142 if(decode_ref_pic_list_reordering(h
) < 0)
4145 if( (h
->pps
.weighted_pred
&& (h
->slice_type
== FF_P_TYPE
|| h
->slice_type
== FF_SP_TYPE
))
4146 || (h
->pps
.weighted_bipred_idc
==1 && h
->slice_type
==FF_B_TYPE
) )
4147 pred_weight_table(h
);
4148 else if(h
->pps
.weighted_bipred_idc
==2 && h
->slice_type
==FF_B_TYPE
)
4149 implicit_weight_table(h
);
4154 decode_ref_pic_marking(h0
, &s
->gb
);
4157 fill_mbaff_ref_list(h
);
4159 if( h
->slice_type
!= FF_I_TYPE
&& h
->slice_type
!= FF_SI_TYPE
&& h
->pps
.cabac
){
4160 tmp
= get_ue_golomb(&s
->gb
);
4162 av_log(s
->avctx
, AV_LOG_ERROR
, "cabac_init_idc overflow\n");
4165 h
->cabac_init_idc
= tmp
;
4168 h
->last_qscale_diff
= 0;
4169 tmp
= h
->pps
.init_qp
+ get_se_golomb(&s
->gb
);
4171 av_log(s
->avctx
, AV_LOG_ERROR
, "QP %u out of range\n", tmp
);
4175 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
4176 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
4177 //FIXME qscale / qp ... stuff
4178 if(h
->slice_type
== FF_SP_TYPE
){
4179 get_bits1(&s
->gb
); /* sp_for_switch_flag */
4181 if(h
->slice_type
==FF_SP_TYPE
|| h
->slice_type
== FF_SI_TYPE
){
4182 get_se_golomb(&s
->gb
); /* slice_qs_delta */
4185 h
->deblocking_filter
= 1;
4186 h
->slice_alpha_c0_offset
= 0;
4187 h
->slice_beta_offset
= 0;
4188 if( h
->pps
.deblocking_filter_parameters_present
) {
4189 tmp
= get_ue_golomb(&s
->gb
);
4191 av_log(s
->avctx
, AV_LOG_ERROR
, "deblocking_filter_idc %u out of range\n", tmp
);
4194 h
->deblocking_filter
= tmp
;
4195 if(h
->deblocking_filter
< 2)
4196 h
->deblocking_filter
^= 1; // 1<->0
4198 if( h
->deblocking_filter
) {
4199 h
->slice_alpha_c0_offset
= get_se_golomb(&s
->gb
) << 1;
4200 h
->slice_beta_offset
= get_se_golomb(&s
->gb
) << 1;
4204 if( s
->avctx
->skip_loop_filter
>= AVDISCARD_ALL
4205 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONKEY
&& h
->slice_type
!= FF_I_TYPE
)
4206 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_BIDIR
&& h
->slice_type
== FF_B_TYPE
)
4207 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
4208 h
->deblocking_filter
= 0;
4210 if(h
->deblocking_filter
== 1 && h0
->max_contexts
> 1) {
4211 if(s
->avctx
->flags2
& CODEC_FLAG2_FAST
) {
4212 /* Cheat slightly for speed:
4213 Do not bother to deblock across slices. */
4214 h
->deblocking_filter
= 2;
4216 h0
->max_contexts
= 1;
4217 if(!h0
->single_decode_warning
) {
4218 av_log(s
->avctx
, AV_LOG_INFO
, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4219 h0
->single_decode_warning
= 1;
4222 return 1; // deblocking switched inside frame
4227 if( h
->pps
.num_slice_groups
> 1 && h
->pps
.mb_slice_group_map_type
>= 3 && h
->pps
.mb_slice_group_map_type
<= 5)
4228 slice_group_change_cycle
= get_bits(&s
->gb
, ?);
4231 h0
->last_slice_type
= slice_type
;
4232 h
->slice_num
= ++h0
->current_slice
;
4234 h
->emu_edge_width
= (s
->flags
&CODEC_FLAG_EMU_EDGE
) ? 0 : 16;
4235 h
->emu_edge_height
= (FRAME_MBAFF
|| FIELD_PICTURE
) ? 0 : h
->emu_edge_width
;
4237 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
4238 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4240 (s
->picture_structure
==PICT_FRAME
? "F" : s
->picture_structure
==PICT_TOP_FIELD
? "T" : "B"),
4242 av_get_pict_type_char(h
->slice_type
),
4243 pps_id
, h
->frame_num
,
4244 s
->current_picture_ptr
->field_poc
[0], s
->current_picture_ptr
->field_poc
[1],
4245 h
->ref_count
[0], h
->ref_count
[1],
4247 h
->deblocking_filter
, h
->slice_alpha_c0_offset
/2, h
->slice_beta_offset
/2,
4249 h
->use_weight
==1 && h
->use_weight_chroma
? "c" : "",
4250 h
->slice_type
== FF_B_TYPE
? (h
->direct_spatial_mv_pred
? "SPAT" : "TEMP") : ""
4260 static inline int get_level_prefix(GetBitContext
*gb
){
4264 OPEN_READER(re
, gb
);
4265 UPDATE_CACHE(re
, gb
);
4266 buf
=GET_CACHE(re
, gb
);
4268 log
= 32 - av_log2(buf
);
4270 print_bin(buf
>>(32-log
), log
);
4271 av_log(NULL
, AV_LOG_DEBUG
, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf
>>(32-log
), log
, log
-1, get_bits_count(gb
), __FILE__
);
4274 LAST_SKIP_BITS(re
, gb
, log
);
4275 CLOSE_READER(re
, gb
);
4280 static inline int get_dct8x8_allowed(H264Context
*h
){
4283 if(!IS_SUB_8X8(h
->sub_mb_type
[i
])
4284 || (!h
->sps
.direct_8x8_inference_flag
&& IS_DIRECT(h
->sub_mb_type
[i
])))
4291 * decodes a residual block.
4292 * @param n block index
4293 * @param scantable scantable
4294 * @param max_coeff number of coefficients in the block
4295 * @return <0 if an error occurred
4297 static int decode_residual(H264Context
*h
, GetBitContext
*gb
, DCTELEM
*block
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
){
4298 MpegEncContext
* const s
= &h
->s
;
4299 static const int coeff_token_table_index
[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4301 int zeros_left
, coeff_num
, coeff_token
, total_coeff
, i
, j
, trailing_ones
, run_before
;
4303 //FIXME put trailing_onex into the context
4305 if(n
== CHROMA_DC_BLOCK_INDEX
){
4306 coeff_token
= get_vlc2(gb
, chroma_dc_coeff_token_vlc
.table
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 1);
4307 total_coeff
= coeff_token
>>2;
4309 if(n
== LUMA_DC_BLOCK_INDEX
){
4310 total_coeff
= pred_non_zero_count(h
, 0);
4311 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4312 total_coeff
= coeff_token
>>2;
4314 total_coeff
= pred_non_zero_count(h
, n
);
4315 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4316 total_coeff
= coeff_token
>>2;
4317 h
->non_zero_count_cache
[ scan8
[n
] ]= total_coeff
;
4321 //FIXME set last_non_zero?
4325 if(total_coeff
> (unsigned)max_coeff
) {
4326 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "corrupted macroblock %d %d (total_coeff=%d)\n", s
->mb_x
, s
->mb_y
, total_coeff
);
4330 trailing_ones
= coeff_token
&3;
4331 tprintf(h
->s
.avctx
, "trailing:%d, total:%d\n", trailing_ones
, total_coeff
);
4332 assert(total_coeff
<=16);
4334 for(i
=0; i
<trailing_ones
; i
++){
4335 level
[i
]= 1 - 2*get_bits1(gb
);
4339 int level_code
, mask
;
4340 int suffix_length
= total_coeff
> 10 && trailing_ones
< 3;
4341 int prefix
= get_level_prefix(gb
);
4343 //first coefficient has suffix_length equal to 0 or 1
4344 if(prefix
<14){ //FIXME try to build a large unified VLC table for all this
4346 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4348 level_code
= (prefix
<<suffix_length
); //part
4349 }else if(prefix
==14){
4351 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4353 level_code
= prefix
+ get_bits(gb
, 4); //part
4354 }else if(prefix
==15){
4355 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, 12); //part
4356 if(suffix_length
==0) level_code
+=15; //FIXME doesn't make (much)sense
4358 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "prefix too large at %d %d\n", s
->mb_x
, s
->mb_y
);
4362 if(trailing_ones
< 3) level_code
+= 2;
4367 mask
= -(level_code
&1);
4368 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4371 //remaining coefficients have suffix_length > 0
4372 for(;i
<total_coeff
;i
++) {
4373 static const int suffix_limit
[7] = {0,5,11,23,47,95,INT_MAX
};
4374 prefix
= get_level_prefix(gb
);
4376 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
);
4377 }else if(prefix
==15){
4378 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, 12);
4380 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "prefix too large at %d %d\n", s
->mb_x
, s
->mb_y
);
4383 mask
= -(level_code
&1);
4384 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4385 if(level_code
> suffix_limit
[suffix_length
])
4390 if(total_coeff
== max_coeff
)
4393 if(n
== CHROMA_DC_BLOCK_INDEX
)
4394 zeros_left
= get_vlc2(gb
, chroma_dc_total_zeros_vlc
[ total_coeff
-1 ].table
, CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 1);
4396 zeros_left
= get_vlc2(gb
, total_zeros_vlc
[ total_coeff
-1 ].table
, TOTAL_ZEROS_VLC_BITS
, 1);
4399 coeff_num
= zeros_left
+ total_coeff
- 1;
4400 j
= scantable
[coeff_num
];
4402 block
[j
] = level
[0];
4403 for(i
=1;i
<total_coeff
;i
++) {
4406 else if(zeros_left
< 7){
4407 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4409 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4411 zeros_left
-= run_before
;
4412 coeff_num
-= 1 + run_before
;
4413 j
= scantable
[ coeff_num
];
4418 block
[j
] = (level
[0] * qmul
[j
] + 32)>>6;
4419 for(i
=1;i
<total_coeff
;i
++) {
4422 else if(zeros_left
< 7){
4423 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4425 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4427 zeros_left
-= run_before
;
4428 coeff_num
-= 1 + run_before
;
4429 j
= scantable
[ coeff_num
];
4431 block
[j
]= (level
[i
] * qmul
[j
] + 32)>>6;
4436 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "negative number of zero coeffs at %d %d\n", s
->mb_x
, s
->mb_y
);
4443 static void predict_field_decoding_flag(H264Context
*h
){
4444 MpegEncContext
* const s
= &h
->s
;
4445 const int mb_xy
= h
->mb_xy
;
4446 int mb_type
= (h
->slice_table
[mb_xy
-1] == h
->slice_num
)
4447 ? s
->current_picture
.mb_type
[mb_xy
-1]
4448 : (h
->slice_table
[mb_xy
-s
->mb_stride
] == h
->slice_num
)
4449 ? s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]
4451 h
->mb_mbaff
= h
->mb_field_decoding_flag
= IS_INTERLACED(mb_type
) ? 1 : 0;
4455 * decodes a P_SKIP or B_SKIP macroblock
4457 static void decode_mb_skip(H264Context
*h
){
4458 MpegEncContext
* const s
= &h
->s
;
4459 const int mb_xy
= h
->mb_xy
;
4462 memset(h
->non_zero_count
[mb_xy
], 0, 16);
4463 memset(h
->non_zero_count_cache
+ 8, 0, 8*5); //FIXME ugly, remove pfui
4466 mb_type
|= MB_TYPE_INTERLACED
;
4468 if( h
->slice_type
== FF_B_TYPE
)
4470 // just for fill_caches. pred_direct_motion will set the real mb_type
4471 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_SKIP
;
4473 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4474 pred_direct_motion(h
, &mb_type
);
4475 mb_type
|= MB_TYPE_SKIP
;
4480 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P1L0
|MB_TYPE_SKIP
;
4482 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4483 pred_pskip_motion(h
, &mx
, &my
);
4484 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
4485 fill_rectangle( h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mx
,my
), 4);
4488 write_back_motion(h
, mb_type
);
4489 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4490 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4491 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4492 h
->prev_mb_skipped
= 1;
4496 * decodes a macroblock
4497 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4499 static int decode_mb_cavlc(H264Context
*h
){
4500 MpegEncContext
* const s
= &h
->s
;
4502 int partition_count
;
4503 unsigned int mb_type
, cbp
;
4504 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
4506 mb_xy
= h
->mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4508 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?
4510 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
4511 cbp
= 0; /* avoid warning. FIXME: find a solution without slowing
4513 if(h
->slice_type
!= FF_I_TYPE
&& h
->slice_type
!= FF_SI_TYPE
){
4514 if(s
->mb_skip_run
==-1)
4515 s
->mb_skip_run
= get_ue_golomb(&s
->gb
);
4517 if (s
->mb_skip_run
--) {
4518 if(FRAME_MBAFF
&& (s
->mb_y
&1) == 0){
4519 if(s
->mb_skip_run
==0)
4520 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4522 predict_field_decoding_flag(h
);
4529 if( (s
->mb_y
&1) == 0 )
4530 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4532 h
->mb_field_decoding_flag
= (s
->picture_structure
!=PICT_FRAME
);
4534 h
->prev_mb_skipped
= 0;
4536 mb_type
= get_ue_golomb(&s
->gb
);
4537 if(h
->slice_type
== FF_B_TYPE
){
4539 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
4540 mb_type
= b_mb_type_info
[mb_type
].type
;
4543 goto decode_intra_mb
;
4545 }else if(h
->slice_type
== FF_P_TYPE
/*|| h->slice_type == FF_SP_TYPE */){
4547 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
4548 mb_type
= p_mb_type_info
[mb_type
].type
;
4551 goto decode_intra_mb
;
4554 assert(h
->slice_type
== FF_I_TYPE
);
4557 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_type %d in %c slice too large at %d %d\n", mb_type
, av_get_pict_type_char(h
->slice_type
), s
->mb_x
, s
->mb_y
);
4561 cbp
= i_mb_type_info
[mb_type
].cbp
;
4562 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
4563 mb_type
= i_mb_type_info
[mb_type
].type
;
4567 mb_type
|= MB_TYPE_INTERLACED
;
4569 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4571 if(IS_INTRA_PCM(mb_type
)){
4574 // We assume these blocks are very rare so we do not optimize it.
4575 align_get_bits(&s
->gb
);
4577 // The pixels are stored in the same order as levels in h->mb array.
4578 for(y
=0; y
<16; y
++){
4579 const int index
= 4*(y
&3) + 32*((y
>>2)&1) + 128*(y
>>3);
4580 for(x
=0; x
<16; x
++){
4581 tprintf(s
->avctx
, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4582 h
->mb
[index
+ (x
&3) + 16*((x
>>2)&1) + 64*(x
>>3)]= get_bits(&s
->gb
, 8);
4586 const int index
= 256 + 4*(y
&3) + 32*(y
>>2);
4588 tprintf(s
->avctx
, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4589 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= get_bits(&s
->gb
, 8);
4593 const int index
= 256 + 64 + 4*(y
&3) + 32*(y
>>2);
4595 tprintf(s
->avctx
, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4596 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= get_bits(&s
->gb
, 8);
4600 // In deblocking, the quantizer is 0
4601 s
->current_picture
.qscale_table
[mb_xy
]= 0;
4602 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, 0);
4603 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, 0);
4604 // All coeffs are present
4605 memset(h
->non_zero_count
[mb_xy
], 16, 16);
4607 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4612 h
->ref_count
[0] <<= 1;
4613 h
->ref_count
[1] <<= 1;
4616 fill_caches(h
, mb_type
, 0);
4619 if(IS_INTRA(mb_type
)){
4621 // init_top_left_availability(h);
4622 if(IS_INTRA4x4(mb_type
)){
4625 if(dct8x8_allowed
&& get_bits1(&s
->gb
)){
4626 mb_type
|= MB_TYPE_8x8DCT
;
4630 // fill_intra4x4_pred_table(h);
4631 for(i
=0; i
<16; i
+=di
){
4632 int mode
= pred_intra_mode(h
, i
);
4634 if(!get_bits1(&s
->gb
)){
4635 const int rem_mode
= get_bits(&s
->gb
, 3);
4636 mode
= rem_mode
+ (rem_mode
>= mode
);
4640 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
4642 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = mode
;
4644 write_back_intra_pred_mode(h
);
4645 if( check_intra4x4_pred_mode(h
) < 0)
4648 h
->intra16x16_pred_mode
= check_intra_pred_mode(h
, h
->intra16x16_pred_mode
);
4649 if(h
->intra16x16_pred_mode
< 0)
4653 pred_mode
= check_intra_pred_mode(h
, get_ue_golomb(&s
->gb
));
4656 h
->chroma_pred_mode
= pred_mode
;
4657 }else if(partition_count
==4){
4658 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
4660 if(h
->slice_type
== FF_B_TYPE
){
4662 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4663 if(h
->sub_mb_type
[i
] >=13){
4664 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "B sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4667 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4668 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4670 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
4671 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3])) {
4672 pred_direct_motion(h
, &mb_type
);
4673 h
->ref_cache
[0][scan8
[4]] =
4674 h
->ref_cache
[1][scan8
[4]] =
4675 h
->ref_cache
[0][scan8
[12]] =
4676 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
4679 assert(h
->slice_type
== FF_P_TYPE
|| h
->slice_type
== FF_SP_TYPE
); //FIXME SP correct ?
4681 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4682 if(h
->sub_mb_type
[i
] >=4){
4683 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "P sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4686 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4687 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4691 for(list
=0; list
<h
->list_count
; list
++){
4692 int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
4694 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
4695 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4696 unsigned int tmp
= get_te0_golomb(&s
->gb
, ref_count
); //FIXME init to 0 before and skip?
4698 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", tmp
);
4710 dct8x8_allowed
= get_dct8x8_allowed(h
);
4712 for(list
=0; list
<h
->list_count
; list
++){
4714 if(IS_DIRECT(h
->sub_mb_type
[i
])) {
4715 h
->ref_cache
[list
][ scan8
[4*i
] ] = h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
4718 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
4719 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
4721 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4722 const int sub_mb_type
= h
->sub_mb_type
[i
];
4723 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
4724 for(j
=0; j
<sub_partition_count
[i
]; j
++){
4726 const int index
= 4*i
+ block_width
*j
;
4727 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
4728 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mx
, &my
);
4729 mx
+= get_se_golomb(&s
->gb
);
4730 my
+= get_se_golomb(&s
->gb
);
4731 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4733 if(IS_SUB_8X8(sub_mb_type
)){
4735 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
4737 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
4738 }else if(IS_SUB_8X4(sub_mb_type
)){
4739 mv_cache
[ 1 ][0]= mx
;
4740 mv_cache
[ 1 ][1]= my
;
4741 }else if(IS_SUB_4X8(sub_mb_type
)){
4742 mv_cache
[ 8 ][0]= mx
;
4743 mv_cache
[ 8 ][1]= my
;
4745 mv_cache
[ 0 ][0]= mx
;
4746 mv_cache
[ 0 ][1]= my
;
4749 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
4755 }else if(IS_DIRECT(mb_type
)){
4756 pred_direct_motion(h
, &mb_type
);
4757 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
4759 int list
, mx
, my
, i
;
4760 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4761 if(IS_16X16(mb_type
)){
4762 for(list
=0; list
<h
->list_count
; list
++){
4764 if(IS_DIR(mb_type
, 0, list
)){
4765 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4766 if(val
>= h
->ref_count
[list
]){
4767 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4771 val
= LIST_NOT_USED
&0xFF;
4772 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 1);
4774 for(list
=0; list
<h
->list_count
; list
++){
4776 if(IS_DIR(mb_type
, 0, list
)){
4777 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mx
, &my
);
4778 mx
+= get_se_golomb(&s
->gb
);
4779 my
+= get_se_golomb(&s
->gb
);
4780 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4782 val
= pack16to32(mx
,my
);
4785 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 4);
4788 else if(IS_16X8(mb_type
)){
4789 for(list
=0; list
<h
->list_count
; list
++){
4792 if(IS_DIR(mb_type
, i
, list
)){
4793 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4794 if(val
>= h
->ref_count
[list
]){
4795 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4799 val
= LIST_NOT_USED
&0xFF;
4800 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 1);
4803 for(list
=0; list
<h
->list_count
; list
++){
4806 if(IS_DIR(mb_type
, i
, list
)){
4807 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mx
, &my
);
4808 mx
+= get_se_golomb(&s
->gb
);
4809 my
+= get_se_golomb(&s
->gb
);
4810 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4812 val
= pack16to32(mx
,my
);
4815 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 4);
4819 assert(IS_8X16(mb_type
));
4820 for(list
=0; list
<h
->list_count
; list
++){
4823 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
4824 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4825 if(val
>= h
->ref_count
[list
]){
4826 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4830 val
= LIST_NOT_USED
&0xFF;
4831 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 1);
4834 for(list
=0; list
<h
->list_count
; list
++){
4837 if(IS_DIR(mb_type
, i
, list
)){
4838 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mx
, &my
);
4839 mx
+= get_se_golomb(&s
->gb
);
4840 my
+= get_se_golomb(&s
->gb
);
4841 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4843 val
= pack16to32(mx
,my
);
4846 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 4);
4852 if(IS_INTER(mb_type
))
4853 write_back_motion(h
, mb_type
);
4855 if(!IS_INTRA16x16(mb_type
)){
4856 cbp
= get_ue_golomb(&s
->gb
);
4858 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cbp too large (%u) at %d %d\n", cbp
, s
->mb_x
, s
->mb_y
);
4862 if(IS_INTRA4x4(mb_type
))
4863 cbp
= golomb_to_intra4x4_cbp
[cbp
];
4865 cbp
= golomb_to_inter_cbp
[cbp
];
4869 if(dct8x8_allowed
&& (cbp
&15) && !IS_INTRA(mb_type
)){
4870 if(get_bits1(&s
->gb
))
4871 mb_type
|= MB_TYPE_8x8DCT
;
4873 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4875 if(cbp
|| IS_INTRA16x16(mb_type
)){
4876 int i8x8
, i4x4
, chroma_idx
;
4878 GetBitContext
*gb
= IS_INTRA(mb_type
) ? h
->intra_gb_ptr
: h
->inter_gb_ptr
;
4879 const uint8_t *scan
, *scan8x8
, *dc_scan
;
4881 // fill_non_zero_count_cache(h);
4883 if(IS_INTERLACED(mb_type
)){
4884 scan8x8
= s
->qscale
? h
->field_scan8x8_cavlc
: h
->field_scan8x8_cavlc_q0
;
4885 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
4886 dc_scan
= luma_dc_field_scan
;
4888 scan8x8
= s
->qscale
? h
->zigzag_scan8x8_cavlc
: h
->zigzag_scan8x8_cavlc_q0
;
4889 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
4890 dc_scan
= luma_dc_zigzag_scan
;
4893 dquant
= get_se_golomb(&s
->gb
);
4895 if( dquant
> 25 || dquant
< -26 ){
4896 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "dquant out of range (%d) at %d %d\n", dquant
, s
->mb_x
, s
->mb_y
);
4900 s
->qscale
+= dquant
;
4901 if(((unsigned)s
->qscale
) > 51){
4902 if(s
->qscale
<0) s
->qscale
+= 52;
4903 else s
->qscale
-= 52;
4906 h
->chroma_qp
[0]= get_chroma_qp(h
, 0, s
->qscale
);
4907 h
->chroma_qp
[1]= get_chroma_qp(h
, 1, s
->qscale
);
4908 if(IS_INTRA16x16(mb_type
)){
4909 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
, LUMA_DC_BLOCK_INDEX
, dc_scan
, h
->dequant4_coeff
[0][s
->qscale
], 16) < 0){
4910 return -1; //FIXME continue if partitioned and other return -1 too
4913 assert((cbp
&15) == 0 || (cbp
&15) == 15);
4916 for(i8x8
=0; i8x8
<4; i8x8
++){
4917 for(i4x4
=0; i4x4
<4; i4x4
++){
4918 const int index
= i4x4
+ 4*i8x8
;
4919 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[0][s
->qscale
], 15) < 0 ){
4925 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
4928 for(i8x8
=0; i8x8
<4; i8x8
++){
4929 if(cbp
& (1<<i8x8
)){
4930 if(IS_8x8DCT(mb_type
)){
4931 DCTELEM
*buf
= &h
->mb
[64*i8x8
];
4933 for(i4x4
=0; i4x4
<4; i4x4
++){
4934 if( decode_residual(h
, gb
, buf
, i4x4
+4*i8x8
, scan8x8
+16*i4x4
,
4935 h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 16) <0 )
4938 nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4939 nnz
[0] += nnz
[1] + nnz
[8] + nnz
[9];
4941 for(i4x4
=0; i4x4
<4; i4x4
++){
4942 const int index
= i4x4
+ 4*i8x8
;
4944 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
, h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
], 16) <0 ){
4950 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4951 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
4957 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++)
4958 if( decode_residual(h
, gb
, h
->mb
+ 256 + 16*4*chroma_idx
, CHROMA_DC_BLOCK_INDEX
, chroma_dc_scan
, NULL
, 4) < 0){
4964 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++){
4965 const uint32_t *qmul
= h
->dequant4_coeff
[chroma_idx
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[chroma_idx
]];
4966 for(i4x4
=0; i4x4
<4; i4x4
++){
4967 const int index
= 16 + 4*chroma_idx
+ i4x4
;
4968 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
+ 1, qmul
, 15) < 0){
4974 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4975 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4976 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4979 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4980 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
4981 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4982 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4984 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4985 write_back_non_zero_count(h
);
4988 h
->ref_count
[0] >>= 1;
4989 h
->ref_count
[1] >>= 1;
4995 static int decode_cabac_field_decoding_flag(H264Context
*h
) {
4996 MpegEncContext
* const s
= &h
->s
;
4997 const int mb_x
= s
->mb_x
;
4998 const int mb_y
= s
->mb_y
& ~1;
4999 const int mba_xy
= mb_x
- 1 + mb_y
*s
->mb_stride
;
5000 const int mbb_xy
= mb_x
+ (mb_y
-2)*s
->mb_stride
;
5002 unsigned int ctx
= 0;
5004 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) ) {
5007 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) ) {
5011 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[70 + ctx
] );
5014 static int decode_cabac_intra_mb_type(H264Context
*h
, int ctx_base
, int intra_slice
) {
5015 uint8_t *state
= &h
->cabac_state
[ctx_base
];
5019 MpegEncContext
* const s
= &h
->s
;
5020 const int mba_xy
= h
->left_mb_xy
[0];
5021 const int mbb_xy
= h
->top_mb_xy
;
5023 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mba_xy
] ) )
5025 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mbb_xy
] ) )
5027 if( get_cabac_noinline( &h
->cabac
, &state
[ctx
] ) == 0 )
5028 return 0; /* I4x4 */
5031 if( get_cabac_noinline( &h
->cabac
, &state
[0] ) == 0 )
5032 return 0; /* I4x4 */
5035 if( get_cabac_terminate( &h
->cabac
) )
5036 return 25; /* PCM */
5038 mb_type
= 1; /* I16x16 */
5039 mb_type
+= 12 * get_cabac_noinline( &h
->cabac
, &state
[1] ); /* cbp_luma != 0 */
5040 if( get_cabac_noinline( &h
->cabac
, &state
[2] ) ) /* cbp_chroma */
5041 mb_type
+= 4 + 4 * get_cabac_noinline( &h
->cabac
, &state
[2+intra_slice
] );
5042 mb_type
+= 2 * get_cabac_noinline( &h
->cabac
, &state
[3+intra_slice
] );
5043 mb_type
+= 1 * get_cabac_noinline( &h
->cabac
, &state
[3+2*intra_slice
] );
5047 static int decode_cabac_mb_type( H264Context
*h
) {
5048 MpegEncContext
* const s
= &h
->s
;
5050 if( h
->slice_type
== FF_I_TYPE
) {
5051 return decode_cabac_intra_mb_type(h
, 3, 1);
5052 } else if( h
->slice_type
== FF_P_TYPE
) {
5053 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[14] ) == 0 ) {
5055 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[15] ) == 0 ) {
5056 /* P_L0_D16x16, P_8x8 */
5057 return 3 * get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[16] );
5059 /* P_L0_D8x16, P_L0_D16x8 */
5060 return 2 - get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[17] );
5063 return decode_cabac_intra_mb_type(h
, 17, 0) + 5;
5065 } else if( h
->slice_type
== FF_B_TYPE
) {
5066 const int mba_xy
= h
->left_mb_xy
[0];
5067 const int mbb_xy
= h
->top_mb_xy
;
5071 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mba_xy
] ) )
5073 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mbb_xy
] ) )
5076 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+ctx
] ) )
5077 return 0; /* B_Direct_16x16 */
5079 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+3] ) ) {
5080 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ); /* B_L[01]_16x16 */
5083 bits
= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+4] ) << 3;
5084 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 2;
5085 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 1;
5086 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
5088 return bits
+ 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5089 else if( bits
== 13 ) {
5090 return decode_cabac_intra_mb_type(h
, 32, 0) + 23;
5091 } else if( bits
== 14 )
5092 return 11; /* B_L1_L0_8x16 */
5093 else if( bits
== 15 )
5094 return 22; /* B_8x8 */
5096 bits
= ( bits
<<1 ) | get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
5097 return bits
- 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5099 /* TODO SI/SP frames? */
5104 static int decode_cabac_mb_skip( H264Context
*h
, int mb_x
, int mb_y
) {
5105 MpegEncContext
* const s
= &h
->s
;
5109 if(FRAME_MBAFF
){ //FIXME merge with the stuff in fill_caches?
5110 int mb_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
5113 && h
->slice_table
[mba_xy
] == h
->slice_num
5114 && MB_FIELD
== !!IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) )
5115 mba_xy
+= s
->mb_stride
;
5117 mbb_xy
= mb_xy
- s
->mb_stride
;
5119 && h
->slice_table
[mbb_xy
] == h
->slice_num
5120 && IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) )
5121 mbb_xy
-= s
->mb_stride
;
5123 mbb_xy
= mb_x
+ (mb_y
-1)*s
->mb_stride
;
5125 int mb_xy
= h
->mb_xy
;
5127 mbb_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
5130 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mba_xy
] ))
5132 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mbb_xy
] ))
5135 if( h
->slice_type
== FF_B_TYPE
)
5137 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[11+ctx
] );
5140 static int decode_cabac_mb_intra4x4_pred_mode( H264Context
*h
, int pred_mode
) {
5143 if( get_cabac( &h
->cabac
, &h
->cabac_state
[68] ) )
5146 mode
+= 1 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5147 mode
+= 2 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5148 mode
+= 4 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5150 if( mode
>= pred_mode
)
5156 static int decode_cabac_mb_chroma_pre_mode( H264Context
*h
) {
5157 const int mba_xy
= h
->left_mb_xy
[0];
5158 const int mbb_xy
= h
->top_mb_xy
;
5162 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5163 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mba_xy
] != 0 )
5166 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mbb_xy
] != 0 )
5169 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+ctx
] ) == 0 )
5172 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5174 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5180 static int decode_cabac_mb_cbp_luma( H264Context
*h
) {
5181 int cbp_b
, cbp_a
, ctx
, cbp
= 0;
5183 cbp_a
= h
->slice_table
[h
->left_mb_xy
[0]] == h
->slice_num
? h
->left_cbp
: -1;
5184 cbp_b
= h
->slice_table
[h
->top_mb_xy
] == h
->slice_num
? h
->top_cbp
: -1;
5186 ctx
= !(cbp_a
& 0x02) + 2 * !(cbp_b
& 0x04);
5187 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]);
5188 ctx
= !(cbp
& 0x01) + 2 * !(cbp_b
& 0x08);
5189 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 1;
5190 ctx
= !(cbp_a
& 0x08) + 2 * !(cbp
& 0x01);
5191 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 2;
5192 ctx
= !(cbp
& 0x04) + 2 * !(cbp
& 0x02);
5193 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 3;
5196 static int decode_cabac_mb_cbp_chroma( H264Context
*h
) {
5200 cbp_a
= (h
->left_cbp
>>4)&0x03;
5201 cbp_b
= (h
-> top_cbp
>>4)&0x03;
5204 if( cbp_a
> 0 ) ctx
++;
5205 if( cbp_b
> 0 ) ctx
+= 2;
5206 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] ) == 0 )
5210 if( cbp_a
== 2 ) ctx
++;
5211 if( cbp_b
== 2 ) ctx
+= 2;
5212 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] );
5214 static int decode_cabac_mb_dqp( H264Context
*h
) {
5218 if( h
->last_qscale_diff
!= 0 )
5221 while( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[60 + ctx
] ) ) {
5227 if(val
> 102) //prevent infinite loop
5234 return -(val
+ 1)/2;
5236 static int decode_cabac_p_mb_sub_type( H264Context
*h
) {
5237 if( get_cabac( &h
->cabac
, &h
->cabac_state
[21] ) )
5239 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[22] ) )
5241 if( get_cabac( &h
->cabac
, &h
->cabac_state
[23] ) )
5245 static int decode_cabac_b_mb_sub_type( H264Context
*h
) {
5247 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[36] ) )
5248 return 0; /* B_Direct_8x8 */
5249 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[37] ) )
5250 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L0_8x8, B_L1_8x8 */
5252 if( get_cabac( &h
->cabac
, &h
->cabac_state
[38] ) ) {
5253 if( get_cabac( &h
->cabac
, &h
->cabac_state
[39] ) )
5254 return 11 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L1_4x4, B_Bi_4x4 */
5257 type
+= 2*get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5258 type
+= get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5262 static inline int decode_cabac_mb_transform_size( H264Context
*h
) {
5263 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[399 + h
->neighbor_transform_size
] );
5266 static int decode_cabac_mb_ref( H264Context
*h
, int list
, int n
) {
5267 int refa
= h
->ref_cache
[list
][scan8
[n
] - 1];
5268 int refb
= h
->ref_cache
[list
][scan8
[n
] - 8];
5272 if( h
->slice_type
== FF_B_TYPE
) {
5273 if( refa
> 0 && !h
->direct_cache
[scan8
[n
] - 1] )
5275 if( refb
> 0 && !h
->direct_cache
[scan8
[n
] - 8] )
5284 while( get_cabac( &h
->cabac
, &h
->cabac_state
[54+ctx
] ) ) {
5290 if(ref
>= 32 /*h->ref_list[list]*/){
5291 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_ref\n");
5292 return 0; //FIXME we should return -1 and check the return everywhere
5298 static int decode_cabac_mb_mvd( H264Context
*h
, int list
, int n
, int l
) {
5299 int amvd
= abs( h
->mvd_cache
[list
][scan8
[n
] - 1][l
] ) +
5300 abs( h
->mvd_cache
[list
][scan8
[n
] - 8][l
] );
5301 int ctxbase
= (l
== 0) ? 40 : 47;
5306 else if( amvd
> 32 )
5311 if(!get_cabac(&h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
]))
5316 while( mvd
< 9 && get_cabac( &h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
] ) ) {
5324 while( get_cabac_bypass( &h
->cabac
) ) {
5328 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_mvd\n");
5333 if( get_cabac_bypass( &h
->cabac
) )
5337 return get_cabac_bypass_sign( &h
->cabac
, -mvd
);
5340 static inline int get_cabac_cbf_ctx( H264Context
*h
, int cat
, int idx
) {
5345 nza
= h
->left_cbp
&0x100;
5346 nzb
= h
-> top_cbp
&0x100;
5347 } else if( cat
== 1 || cat
== 2 ) {
5348 nza
= h
->non_zero_count_cache
[scan8
[idx
] - 1];
5349 nzb
= h
->non_zero_count_cache
[scan8
[idx
] - 8];
5350 } else if( cat
== 3 ) {
5351 nza
= (h
->left_cbp
>>(6+idx
))&0x01;
5352 nzb
= (h
-> top_cbp
>>(6+idx
))&0x01;
5355 nza
= h
->non_zero_count_cache
[scan8
[16+idx
] - 1];
5356 nzb
= h
->non_zero_count_cache
[scan8
[16+idx
] - 8];
5365 return ctx
+ 4 * cat
;
5368 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8
[63]) = {
5369 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5370 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5371 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5372 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5375 static void decode_cabac_residual( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5376 static const int significant_coeff_flag_offset
[2][6] = {
5377 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5378 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5380 static const int last_coeff_flag_offset
[2][6] = {
5381 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5382 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5384 static const int coeff_abs_level_m1_offset
[6] = {
5385 227+0, 227+10, 227+20, 227+30, 227+39, 426
5387 static const uint8_t significant_coeff_flag_offset_8x8
[2][63] = {
5388 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5389 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5390 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5391 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5392 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5393 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5394 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5395 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5397 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5398 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5399 * map node ctx => cabac ctx for level=1 */
5400 static const uint8_t coeff_abs_level1_ctx
[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5401 /* map node ctx => cabac ctx for level>1 */
5402 static const uint8_t coeff_abs_levelgt1_ctx
[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5403 static const uint8_t coeff_abs_level_transition
[2][8] = {
5404 /* update node ctx after decoding a level=1 */
5405 { 1, 2, 3, 3, 4, 5, 6, 7 },
5406 /* update node ctx after decoding a level>1 */
5407 { 4, 4, 4, 4, 5, 6, 7, 7 }
5413 int coeff_count
= 0;
5416 uint8_t *significant_coeff_ctx_base
;
5417 uint8_t *last_coeff_ctx_base
;
5418 uint8_t *abs_level_m1_ctx_base
;
5421 #define CABAC_ON_STACK
5423 #ifdef CABAC_ON_STACK
5426 cc
.range
= h
->cabac
.range
;
5427 cc
.low
= h
->cabac
.low
;
5428 cc
.bytestream
= h
->cabac
.bytestream
;
5430 #define CC &h->cabac
5434 /* cat: 0-> DC 16x16 n = 0
5435 * 1-> AC 16x16 n = luma4x4idx
5436 * 2-> Luma4x4 n = luma4x4idx
5437 * 3-> DC Chroma n = iCbCr
5438 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5439 * 5-> Luma8x8 n = 4 * luma8x8idx
5442 /* read coded block flag */
5444 if( get_cabac( CC
, &h
->cabac_state
[85 + get_cabac_cbf_ctx( h
, cat
, n
) ] ) == 0 ) {
5445 if( cat
== 1 || cat
== 2 )
5446 h
->non_zero_count_cache
[scan8
[n
]] = 0;
5448 h
->non_zero_count_cache
[scan8
[16+n
]] = 0;
5449 #ifdef CABAC_ON_STACK
5450 h
->cabac
.range
= cc
.range
;
5451 h
->cabac
.low
= cc
.low
;
5452 h
->cabac
.bytestream
= cc
.bytestream
;
5458 significant_coeff_ctx_base
= h
->cabac_state
5459 + significant_coeff_flag_offset
[MB_FIELD
][cat
];
5460 last_coeff_ctx_base
= h
->cabac_state
5461 + last_coeff_flag_offset
[MB_FIELD
][cat
];
5462 abs_level_m1_ctx_base
= h
->cabac_state
5463 + coeff_abs_level_m1_offset
[cat
];
5466 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5467 for(last= 0; last < coefs; last++) { \
5468 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5469 if( get_cabac( CC, sig_ctx )) { \
5470 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5471 index[coeff_count++] = last; \
5472 if( get_cabac( CC, last_ctx ) ) { \
5478 if( last == max_coeff -1 ) {\
5479 index[coeff_count++] = last;\
5481 const uint8_t *sig_off
= significant_coeff_flag_offset_8x8
[MB_FIELD
];
5482 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5483 coeff_count
= decode_significance_8x8_x86(CC
, significant_coeff_ctx_base
, index
, sig_off
);
5485 coeff_count
= decode_significance_x86(CC
, max_coeff
, significant_coeff_ctx_base
, index
);
5487 DECODE_SIGNIFICANCE( 63, sig_off
[last
], last_coeff_flag_offset_8x8
[last
] );
5489 DECODE_SIGNIFICANCE( max_coeff
- 1, last
, last
);
5492 assert(coeff_count
> 0);
5495 h
->cbp_table
[h
->mb_xy
] |= 0x100;
5496 else if( cat
== 1 || cat
== 2 )
5497 h
->non_zero_count_cache
[scan8
[n
]] = coeff_count
;
5499 h
->cbp_table
[h
->mb_xy
] |= 0x40 << n
;
5501 h
->non_zero_count_cache
[scan8
[16+n
]] = coeff_count
;
5504 fill_rectangle(&h
->non_zero_count_cache
[scan8
[n
]], 2, 2, 8, coeff_count
, 1);
5507 for( coeff_count
--; coeff_count
>= 0; coeff_count
-- ) {
5508 uint8_t *ctx
= coeff_abs_level1_ctx
[node_ctx
] + abs_level_m1_ctx_base
;
5510 int j
= scantable
[index
[coeff_count
]];
5512 if( get_cabac( CC
, ctx
) == 0 ) {
5513 node_ctx
= coeff_abs_level_transition
[0][node_ctx
];
5515 block
[j
] = get_cabac_bypass_sign( CC
, -1);
5517 block
[j
] = (get_cabac_bypass_sign( CC
, -qmul
[j
]) + 32) >> 6;
5521 ctx
= coeff_abs_levelgt1_ctx
[node_ctx
] + abs_level_m1_ctx_base
;
5522 node_ctx
= coeff_abs_level_transition
[1][node_ctx
];
5524 while( coeff_abs
< 15 && get_cabac( CC
, ctx
) ) {
5528 if( coeff_abs
>= 15 ) {
5530 while( get_cabac_bypass( CC
) ) {
5536 coeff_abs
+= coeff_abs
+ get_cabac_bypass( CC
);
5542 if( get_cabac_bypass( CC
) ) block
[j
] = -coeff_abs
;
5543 else block
[j
] = coeff_abs
;
5545 if( get_cabac_bypass( CC
) ) block
[j
] = (-coeff_abs
* qmul
[j
] + 32) >> 6;
5546 else block
[j
] = ( coeff_abs
* qmul
[j
] + 32) >> 6;
5550 #ifdef CABAC_ON_STACK
5551 h
->cabac
.range
= cc
.range
;
5552 h
->cabac
.low
= cc
.low
;
5553 h
->cabac
.bytestream
= cc
.bytestream
;
5558 static inline void compute_mb_neighbors(H264Context
*h
)
5560 MpegEncContext
* const s
= &h
->s
;
5561 const int mb_xy
= h
->mb_xy
;
5562 h
->top_mb_xy
= mb_xy
- s
->mb_stride
;
5563 h
->left_mb_xy
[0] = mb_xy
- 1;
5565 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
5566 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
5567 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
5568 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
5569 const int curr_mb_frame_flag
= !MB_FIELD
;
5570 const int bottom
= (s
->mb_y
& 1);
5572 ? !curr_mb_frame_flag
// bottom macroblock
5573 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
5575 h
->top_mb_xy
-= s
->mb_stride
;
5577 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
5578 h
->left_mb_xy
[0] = pair_xy
- 1;
5580 } else if (FIELD_PICTURE
) {
5581 h
->top_mb_xy
-= s
->mb_stride
;
5587 * decodes a macroblock
5588 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5590 static int decode_mb_cabac(H264Context
*h
) {
5591 MpegEncContext
* const s
= &h
->s
;
5593 int mb_type
, partition_count
, cbp
= 0;
5594 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
5596 mb_xy
= h
->mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5598 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?)
5600 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
5601 if( h
->slice_type
!= FF_I_TYPE
&& h
->slice_type
!= FF_SI_TYPE
) {
5603 /* a skipped mb needs the aff flag from the following mb */
5604 if( FRAME_MBAFF
&& s
->mb_x
==0 && (s
->mb_y
&1)==0 )
5605 predict_field_decoding_flag(h
);
5606 if( FRAME_MBAFF
&& (s
->mb_y
&1)==1 && h
->prev_mb_skipped
)
5607 skip
= h
->next_mb_skipped
;
5609 skip
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
);
5610 /* read skip flags */
5612 if( FRAME_MBAFF
&& (s
->mb_y
&1)==0 ){
5613 s
->current_picture
.mb_type
[mb_xy
] = MB_TYPE_SKIP
;
5614 h
->next_mb_skipped
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
+1 );
5615 if(h
->next_mb_skipped
)
5616 predict_field_decoding_flag(h
);
5618 h
->mb_mbaff
= h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5623 h
->cbp_table
[mb_xy
] = 0;
5624 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5625 h
->last_qscale_diff
= 0;
5632 if( (s
->mb_y
&1) == 0 )
5634 h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5636 h
->mb_field_decoding_flag
= (s
->picture_structure
!=PICT_FRAME
);
5638 h
->prev_mb_skipped
= 0;
5640 compute_mb_neighbors(h
);
5641 if( ( mb_type
= decode_cabac_mb_type( h
) ) < 0 ) {
5642 av_log( h
->s
.avctx
, AV_LOG_ERROR
, "decode_cabac_mb_type failed\n" );
5646 if( h
->slice_type
== FF_B_TYPE
) {
5648 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
5649 mb_type
= b_mb_type_info
[mb_type
].type
;
5652 goto decode_intra_mb
;
5654 } else if( h
->slice_type
== FF_P_TYPE
) {
5656 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
5657 mb_type
= p_mb_type_info
[mb_type
].type
;
5660 goto decode_intra_mb
;
5663 assert(h
->slice_type
== FF_I_TYPE
);
5665 partition_count
= 0;
5666 cbp
= i_mb_type_info
[mb_type
].cbp
;
5667 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
5668 mb_type
= i_mb_type_info
[mb_type
].type
;
5671 mb_type
|= MB_TYPE_INTERLACED
;
5673 h
->slice_table
[ mb_xy
]= h
->slice_num
;
5675 if(IS_INTRA_PCM(mb_type
)) {
5679 // We assume these blocks are very rare so we do not optimize it.
5680 // FIXME The two following lines get the bitstream position in the cabac
5681 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5682 ptr
= h
->cabac
.bytestream
;
5683 if(h
->cabac
.low
&0x1) ptr
--;
5685 if(h
->cabac
.low
&0x1FF) ptr
--;
5688 // The pixels are stored in the same order as levels in h->mb array.
5689 for(y
=0; y
<16; y
++){
5690 const int index
= 4*(y
&3) + 32*((y
>>2)&1) + 128*(y
>>3);
5691 for(x
=0; x
<16; x
++){
5692 tprintf(s
->avctx
, "LUMA ICPM LEVEL (%3d)\n", *ptr
);
5693 h
->mb
[index
+ (x
&3) + 16*((x
>>2)&1) + 64*(x
>>3)]= *ptr
++;
5697 const int index
= 256 + 4*(y
&3) + 32*(y
>>2);
5699 tprintf(s
->avctx
, "CHROMA U ICPM LEVEL (%3d)\n", *ptr
);
5700 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= *ptr
++;
5704 const int index
= 256 + 64 + 4*(y
&3) + 32*(y
>>2);
5706 tprintf(s
->avctx
, "CHROMA V ICPM LEVEL (%3d)\n", *ptr
);
5707 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= *ptr
++;
5711 ff_init_cabac_decoder(&h
->cabac
, ptr
, h
->cabac
.bytestream_end
- ptr
);
5713 // All blocks are present
5714 h
->cbp_table
[mb_xy
] = 0x1ef;
5715 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5716 // In deblocking, the quantizer is 0
5717 s
->current_picture
.qscale_table
[mb_xy
]= 0;
5718 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, 0);
5719 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, 0);
5720 // All coeffs are present
5721 memset(h
->non_zero_count
[mb_xy
], 16, 16);
5722 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5727 h
->ref_count
[0] <<= 1;
5728 h
->ref_count
[1] <<= 1;
5731 fill_caches(h
, mb_type
, 0);
5733 if( IS_INTRA( mb_type
) ) {
5735 if( IS_INTRA4x4( mb_type
) ) {
5736 if( dct8x8_allowed
&& decode_cabac_mb_transform_size( h
) ) {
5737 mb_type
|= MB_TYPE_8x8DCT
;
5738 for( i
= 0; i
< 16; i
+=4 ) {
5739 int pred
= pred_intra_mode( h
, i
);
5740 int mode
= decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5741 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
5744 for( i
= 0; i
< 16; i
++ ) {
5745 int pred
= pred_intra_mode( h
, i
);
5746 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5748 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5751 write_back_intra_pred_mode(h
);
5752 if( check_intra4x4_pred_mode(h
) < 0 ) return -1;
5754 h
->intra16x16_pred_mode
= check_intra_pred_mode( h
, h
->intra16x16_pred_mode
);
5755 if( h
->intra16x16_pred_mode
< 0 ) return -1;
5757 h
->chroma_pred_mode_table
[mb_xy
] =
5758 pred_mode
= decode_cabac_mb_chroma_pre_mode( h
);
5760 pred_mode
= check_intra_pred_mode( h
, pred_mode
);
5761 if( pred_mode
< 0 ) return -1;
5762 h
->chroma_pred_mode
= pred_mode
;
5763 } else if( partition_count
== 4 ) {
5764 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
5766 if( h
->slice_type
== FF_B_TYPE
) {
5767 for( i
= 0; i
< 4; i
++ ) {
5768 h
->sub_mb_type
[i
] = decode_cabac_b_mb_sub_type( h
);
5769 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5770 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5772 if( IS_DIRECT(h
->sub_mb_type
[0] | h
->sub_mb_type
[1] |
5773 h
->sub_mb_type
[2] | h
->sub_mb_type
[3]) ) {
5774 pred_direct_motion(h
, &mb_type
);
5775 h
->ref_cache
[0][scan8
[4]] =
5776 h
->ref_cache
[1][scan8
[4]] =
5777 h
->ref_cache
[0][scan8
[12]] =
5778 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
5779 if( h
->ref_count
[0] > 1 || h
->ref_count
[1] > 1 ) {
5780 for( i
= 0; i
< 4; i
++ )
5781 if( IS_DIRECT(h
->sub_mb_type
[i
]) )
5782 fill_rectangle( &h
->direct_cache
[scan8
[4*i
]], 2, 2, 8, 1, 1 );
5786 for( i
= 0; i
< 4; i
++ ) {
5787 h
->sub_mb_type
[i
] = decode_cabac_p_mb_sub_type( h
);
5788 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5789 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5793 for( list
= 0; list
< h
->list_count
; list
++ ) {
5794 for( i
= 0; i
< 4; i
++ ) {
5795 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
5796 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
5797 if( h
->ref_count
[list
] > 1 )
5798 ref
[list
][i
] = decode_cabac_mb_ref( h
, list
, 4*i
);
5804 h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
5805 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
5810 dct8x8_allowed
= get_dct8x8_allowed(h
);
5812 for(list
=0; list
<h
->list_count
; list
++){
5814 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
5815 if(IS_DIRECT(h
->sub_mb_type
[i
])){
5816 fill_rectangle(h
->mvd_cache
[list
][scan8
[4*i
]], 2, 2, 8, 0, 4);
5820 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
) && !IS_DIRECT(h
->sub_mb_type
[i
])){
5821 const int sub_mb_type
= h
->sub_mb_type
[i
];
5822 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
5823 for(j
=0; j
<sub_partition_count
[i
]; j
++){
5826 const int index
= 4*i
+ block_width
*j
;
5827 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
5828 int16_t (* mvd_cache
)[2]= &h
->mvd_cache
[list
][ scan8
[index
] ];
5829 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mpx
, &mpy
);
5831 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, index
, 0 );
5832 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, index
, 1 );
5833 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5835 if(IS_SUB_8X8(sub_mb_type
)){
5837 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
5839 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
5842 mvd_cache
[ 8 ][0]= mvd_cache
[ 9 ][0]= mx
- mpx
;
5844 mvd_cache
[ 8 ][1]= mvd_cache
[ 9 ][1]= my
- mpy
;
5845 }else if(IS_SUB_8X4(sub_mb_type
)){
5846 mv_cache
[ 1 ][0]= mx
;
5847 mv_cache
[ 1 ][1]= my
;
5849 mvd_cache
[ 1 ][0]= mx
- mpx
;
5850 mvd_cache
[ 1 ][1]= my
- mpy
;
5851 }else if(IS_SUB_4X8(sub_mb_type
)){
5852 mv_cache
[ 8 ][0]= mx
;
5853 mv_cache
[ 8 ][1]= my
;
5855 mvd_cache
[ 8 ][0]= mx
- mpx
;
5856 mvd_cache
[ 8 ][1]= my
- mpy
;
5858 mv_cache
[ 0 ][0]= mx
;
5859 mv_cache
[ 0 ][1]= my
;
5861 mvd_cache
[ 0 ][0]= mx
- mpx
;
5862 mvd_cache
[ 0 ][1]= my
- mpy
;
5865 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
5866 uint32_t *pd
= (uint32_t *)&h
->mvd_cache
[list
][ scan8
[4*i
] ][0];
5867 p
[0] = p
[1] = p
[8] = p
[9] = 0;
5868 pd
[0]= pd
[1]= pd
[8]= pd
[9]= 0;
5872 } else if( IS_DIRECT(mb_type
) ) {
5873 pred_direct_motion(h
, &mb_type
);
5874 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
5875 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
5876 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
5878 int list
, mx
, my
, i
, mpx
, mpy
;
5879 if(IS_16X16(mb_type
)){
5880 for(list
=0; list
<h
->list_count
; list
++){
5881 if(IS_DIR(mb_type
, 0, list
)){
5882 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 0 ) : 0;
5883 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, ref
, 1);
5885 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1); //FIXME factorize and the other fill_rect below too
5887 for(list
=0; list
<h
->list_count
; list
++){
5888 if(IS_DIR(mb_type
, 0, list
)){
5889 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mpx
, &mpy
);
5891 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 0, 0 );
5892 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 0, 1 );
5893 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5895 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5896 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
5898 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
5901 else if(IS_16X8(mb_type
)){
5902 for(list
=0; list
<h
->list_count
; list
++){
5904 if(IS_DIR(mb_type
, i
, list
)){
5905 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 8*i
) : 0;
5906 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, ref
, 1);
5908 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
5911 for(list
=0; list
<h
->list_count
; list
++){
5913 if(IS_DIR(mb_type
, i
, list
)){
5914 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mpx
, &mpy
);
5915 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 0 );
5916 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 1 );
5917 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5919 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5920 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
5922 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5923 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5928 assert(IS_8X16(mb_type
));
5929 for(list
=0; list
<h
->list_count
; list
++){
5931 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
5932 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 4*i
) : 0;
5933 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, ref
, 1);
5935 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
5938 for(list
=0; list
<h
->list_count
; list
++){
5940 if(IS_DIR(mb_type
, i
, list
)){
5941 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mpx
, &mpy
);
5942 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 0 );
5943 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 1 );
5945 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5946 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5947 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
5949 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5950 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5957 if( IS_INTER( mb_type
) ) {
5958 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5959 write_back_motion( h
, mb_type
);
5962 if( !IS_INTRA16x16( mb_type
) ) {
5963 cbp
= decode_cabac_mb_cbp_luma( h
);
5964 cbp
|= decode_cabac_mb_cbp_chroma( h
) << 4;
5967 h
->cbp_table
[mb_xy
] = h
->cbp
= cbp
;
5969 if( dct8x8_allowed
&& (cbp
&15) && !IS_INTRA( mb_type
) ) {
5970 if( decode_cabac_mb_transform_size( h
) )
5971 mb_type
|= MB_TYPE_8x8DCT
;
5973 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5975 if( cbp
|| IS_INTRA16x16( mb_type
) ) {
5976 const uint8_t *scan
, *scan8x8
, *dc_scan
;
5977 const uint32_t *qmul
;
5980 if(IS_INTERLACED(mb_type
)){
5981 scan8x8
= s
->qscale
? h
->field_scan8x8
: h
->field_scan8x8_q0
;
5982 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
5983 dc_scan
= luma_dc_field_scan
;
5985 scan8x8
= s
->qscale
? h
->zigzag_scan8x8
: h
->zigzag_scan8x8_q0
;
5986 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
5987 dc_scan
= luma_dc_zigzag_scan
;
5990 h
->last_qscale_diff
= dqp
= decode_cabac_mb_dqp( h
);
5991 if( dqp
== INT_MIN
){
5992 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cabac decode of qscale diff failed at %d %d\n", s
->mb_x
, s
->mb_y
);
5996 if(((unsigned)s
->qscale
) > 51){
5997 if(s
->qscale
<0) s
->qscale
+= 52;
5998 else s
->qscale
-= 52;
6000 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
6001 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
6003 if( IS_INTRA16x16( mb_type
) ) {
6005 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6006 decode_cabac_residual( h
, h
->mb
, 0, 0, dc_scan
, NULL
, 16);
6009 qmul
= h
->dequant4_coeff
[0][s
->qscale
];
6010 for( i
= 0; i
< 16; i
++ ) {
6011 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6012 decode_cabac_residual(h
, h
->mb
+ 16*i
, 1, i
, scan
+ 1, qmul
, 15);
6015 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
6019 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
6020 if( cbp
& (1<<i8x8
) ) {
6021 if( IS_8x8DCT(mb_type
) ) {
6022 decode_cabac_residual(h
, h
->mb
+ 64*i8x8
, 5, 4*i8x8
,
6023 scan8x8
, h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 64);
6025 qmul
= h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
];
6026 for( i4x4
= 0; i4x4
< 4; i4x4
++ ) {
6027 const int index
= 4*i8x8
+ i4x4
;
6028 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6030 decode_cabac_residual(h
, h
->mb
+ 16*index
, 2, index
, scan
, qmul
, 16);
6031 //STOP_TIMER("decode_residual")
6035 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
6036 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
6043 for( c
= 0; c
< 2; c
++ ) {
6044 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6045 decode_cabac_residual(h
, h
->mb
+ 256 + 16*4*c
, 3, c
, chroma_dc_scan
, NULL
, 4);
6051 for( c
= 0; c
< 2; c
++ ) {
6052 qmul
= h
->dequant4_coeff
[c
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[c
]];
6053 for( i
= 0; i
< 4; i
++ ) {
6054 const int index
= 16 + 4 * c
+ i
;
6055 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6056 decode_cabac_residual(h
, h
->mb
+ 16*index
, 4, index
- 16, scan
+ 1, qmul
, 15);
6060 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
6061 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
6062 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
6065 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
6066 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
6067 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
6068 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
6069 h
->last_qscale_diff
= 0;
6072 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
6073 write_back_non_zero_count(h
);
6076 h
->ref_count
[0] >>= 1;
6077 h
->ref_count
[1] >>= 1;
6084 static void filter_mb_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6086 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6087 const int alpha
= (alpha_table
+52)[index_a
];
6088 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6093 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] : -1;
6094 h
->s
.dsp
.h264_h_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6096 /* 16px edge length, because bS=4 is triggered by being at
6097 * the edge of an intra MB, so all 4 bS are the same */
6098 for( d
= 0; d
< 16; d
++ ) {
6099 const int p0
= pix
[-1];
6100 const int p1
= pix
[-2];
6101 const int p2
= pix
[-3];
6103 const int q0
= pix
[0];
6104 const int q1
= pix
[1];
6105 const int q2
= pix
[2];
6107 if( FFABS( p0
- q0
) < alpha
&&
6108 FFABS( p1
- p0
) < beta
&&
6109 FFABS( q1
- q0
) < beta
) {
6111 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6112 if( FFABS( p2
- p0
) < beta
)
6114 const int p3
= pix
[-4];
6116 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6117 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6118 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6121 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6123 if( FFABS( q2
- q0
) < beta
)
6125 const int q3
= pix
[3];
6127 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6128 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6129 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6132 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6136 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6137 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6139 tprintf(h
->s
.avctx
, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2], pix
[-1], pix
[0], pix
[1]);
6145 static void filter_mb_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6147 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6148 const int alpha
= (alpha_table
+52)[index_a
];
6149 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6154 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] + 1 : 0;
6155 h
->s
.dsp
.h264_h_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6157 h
->s
.dsp
.h264_h_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6161 static void filter_mb_mbaff_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6163 for( i
= 0; i
< 16; i
++, pix
+= stride
) {
6169 int bS_index
= (i
>> 1);
6172 bS_index
|= (i
& 1);
6175 if( bS
[bS_index
] == 0 ) {
6179 qp_index
= MB_FIELD
? (i
>> 3) : (i
& 1);
6180 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6181 alpha
= (alpha_table
+52)[index_a
];
6182 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6184 if( bS
[bS_index
] < 4 ) {
6185 const int tc0
= (tc0_table
+52)[index_a
][bS
[bS_index
] - 1];
6186 const int p0
= pix
[-1];
6187 const int p1
= pix
[-2];
6188 const int p2
= pix
[-3];
6189 const int q0
= pix
[0];
6190 const int q1
= pix
[1];
6191 const int q2
= pix
[2];
6193 if( FFABS( p0
- q0
) < alpha
&&
6194 FFABS( p1
- p0
) < beta
&&
6195 FFABS( q1
- q0
) < beta
) {
6199 if( FFABS( p2
- p0
) < beta
) {
6200 pix
[-2] = p1
+ av_clip( ( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( p1
<< 1 ) ) >> 1, -tc0
, tc0
);
6203 if( FFABS( q2
- q0
) < beta
) {
6204 pix
[1] = q1
+ av_clip( ( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( q1
<< 1 ) ) >> 1, -tc0
, tc0
);
6208 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6209 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6210 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6211 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6214 const int p0
= pix
[-1];
6215 const int p1
= pix
[-2];
6216 const int p2
= pix
[-3];
6218 const int q0
= pix
[0];
6219 const int q1
= pix
[1];
6220 const int q2
= pix
[2];
6222 if( FFABS( p0
- q0
) < alpha
&&
6223 FFABS( p1
- p0
) < beta
&&
6224 FFABS( q1
- q0
) < beta
) {
6226 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6227 if( FFABS( p2
- p0
) < beta
)
6229 const int p3
= pix
[-4];
6231 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6232 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6233 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6236 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6238 if( FFABS( q2
- q0
) < beta
)
6240 const int q3
= pix
[3];
6242 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6243 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6244 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6247 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6251 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6252 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6254 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6259 static void filter_mb_mbaff_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6261 for( i
= 0; i
< 8; i
++, pix
+= stride
) {
6269 if( bS
[bS_index
] == 0 ) {
6273 qp_index
= MB_FIELD
? (i
>> 2) : (i
& 1);
6274 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6275 alpha
= (alpha_table
+52)[index_a
];
6276 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6278 if( bS
[bS_index
] < 4 ) {
6279 const int tc
= (tc0_table
+52)[index_a
][bS
[bS_index
] - 1] + 1;
6280 const int p0
= pix
[-1];
6281 const int p1
= pix
[-2];
6282 const int q0
= pix
[0];
6283 const int q1
= pix
[1];
6285 if( FFABS( p0
- q0
) < alpha
&&
6286 FFABS( p1
- p0
) < beta
&&
6287 FFABS( q1
- q0
) < beta
) {
6288 const int i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6290 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6291 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6292 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6295 const int p0
= pix
[-1];
6296 const int p1
= pix
[-2];
6297 const int q0
= pix
[0];
6298 const int q1
= pix
[1];
6300 if( FFABS( p0
- q0
) < alpha
&&
6301 FFABS( p1
- p0
) < beta
&&
6302 FFABS( q1
- q0
) < beta
) {
6304 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
6305 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
6306 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, pix
[-3], p1
, p0
, q0
, q1
, pix
[2], pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6312 static void filter_mb_edgeh( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6314 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6315 const int alpha
= (alpha_table
+52)[index_a
];
6316 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6317 const int pix_next
= stride
;
6322 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] : -1;
6323 h
->s
.dsp
.h264_v_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6325 /* 16px edge length, see filter_mb_edgev */
6326 for( d
= 0; d
< 16; d
++ ) {
6327 const int p0
= pix
[-1*pix_next
];
6328 const int p1
= pix
[-2*pix_next
];
6329 const int p2
= pix
[-3*pix_next
];
6330 const int q0
= pix
[0];
6331 const int q1
= pix
[1*pix_next
];
6332 const int q2
= pix
[2*pix_next
];
6334 if( FFABS( p0
- q0
) < alpha
&&
6335 FFABS( p1
- p0
) < beta
&&
6336 FFABS( q1
- q0
) < beta
) {
6338 const int p3
= pix
[-4*pix_next
];
6339 const int q3
= pix
[ 3*pix_next
];
6341 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6342 if( FFABS( p2
- p0
) < beta
) {
6344 pix
[-1*pix_next
] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6345 pix
[-2*pix_next
] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6346 pix
[-3*pix_next
] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6349 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6351 if( FFABS( q2
- q0
) < beta
) {
6353 pix
[0*pix_next
] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6354 pix
[1*pix_next
] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6355 pix
[2*pix_next
] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6358 pix
[0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6362 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6363 pix
[ 0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6365 tprintf(h
->s
.avctx
, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, qp
, index_a
, alpha
, beta
, bS
[i
], p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2*pix_next
], pix
[-pix_next
], pix
[0], pix
[pix_next
]);
6372 static void filter_mb_edgech( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6374 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6375 const int alpha
= (alpha_table
+52)[index_a
];
6376 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6381 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] + 1 : 0;
6382 h
->s
.dsp
.h264_v_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6384 h
->s
.dsp
.h264_v_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6388 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6389 MpegEncContext
* const s
= &h
->s
;
6390 int mb_y_firstrow
= s
->picture_structure
== PICT_BOTTOM_FIELD
;
6392 int qp
, qp0
, qp1
, qpc
, qpc0
, qpc1
, qp_thresh
;
6396 if(mb_x
==0 || mb_y
==mb_y_firstrow
|| !s
->dsp
.h264_loop_filter_strength
|| h
->pps
.chroma_qp_diff
||
6397 (h
->deblocking_filter
== 2 && (h
->slice_table
[mb_xy
] != h
->slice_table
[h
->top_mb_xy
] ||
6398 h
->slice_table
[mb_xy
] != h
->slice_table
[mb_xy
- 1]))) {
6399 filter_mb(h
, mb_x
, mb_y
, img_y
, img_cb
, img_cr
, linesize
, uvlinesize
);
6402 assert(!FRAME_MBAFF
);
6404 mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6405 qp
= s
->current_picture
.qscale_table
[mb_xy
];
6406 qp0
= s
->current_picture
.qscale_table
[mb_xy
-1];
6407 qp1
= s
->current_picture
.qscale_table
[h
->top_mb_xy
];
6408 qpc
= get_chroma_qp( h
, 0, qp
);
6409 qpc0
= get_chroma_qp( h
, 0, qp0
);
6410 qpc1
= get_chroma_qp( h
, 0, qp1
);
6411 qp0
= (qp
+ qp0
+ 1) >> 1;
6412 qp1
= (qp
+ qp1
+ 1) >> 1;
6413 qpc0
= (qpc
+ qpc0
+ 1) >> 1;
6414 qpc1
= (qpc
+ qpc1
+ 1) >> 1;
6415 qp_thresh
= 15 - h
->slice_alpha_c0_offset
;
6416 if(qp
<= qp_thresh
&& qp0
<= qp_thresh
&& qp1
<= qp_thresh
&&
6417 qpc
<= qp_thresh
&& qpc0
<= qp_thresh
&& qpc1
<= qp_thresh
)
6420 if( IS_INTRA(mb_type
) ) {
6421 int16_t bS4
[4] = {4,4,4,4};
6422 int16_t bS3
[4] = {3,3,3,3};
6423 int16_t *bSH
= FIELD_PICTURE
? bS3
: bS4
;
6424 if( IS_8x8DCT(mb_type
) ) {
6425 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6426 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6427 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6428 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6430 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6431 filter_mb_edgev( h
, &img_y
[4*1], linesize
, bS3
, qp
);
6432 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6433 filter_mb_edgev( h
, &img_y
[4*3], linesize
, bS3
, qp
);
6434 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6435 filter_mb_edgeh( h
, &img_y
[4*1*linesize
], linesize
, bS3
, qp
);
6436 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6437 filter_mb_edgeh( h
, &img_y
[4*3*linesize
], linesize
, bS3
, qp
);
6439 filter_mb_edgecv( h
, &img_cb
[2*0], uvlinesize
, bS4
, qpc0
);
6440 filter_mb_edgecv( h
, &img_cb
[2*2], uvlinesize
, bS3
, qpc
);
6441 filter_mb_edgecv( h
, &img_cr
[2*0], uvlinesize
, bS4
, qpc0
);
6442 filter_mb_edgecv( h
, &img_cr
[2*2], uvlinesize
, bS3
, qpc
);
6443 filter_mb_edgech( h
, &img_cb
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6444 filter_mb_edgech( h
, &img_cb
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6445 filter_mb_edgech( h
, &img_cr
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6446 filter_mb_edgech( h
, &img_cr
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6449 DECLARE_ALIGNED_8(int16_t, bS
[2][4][4]);
6450 uint64_t (*bSv
)[4] = (uint64_t(*)[4])bS
;
6452 if( IS_8x8DCT(mb_type
) && (h
->cbp
&7) == 7 ) {
6454 bSv
[0][0] = bSv
[0][2] = bSv
[1][0] = bSv
[1][2] = 0x0002000200020002ULL
;
6456 int mask_edge1
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
)) ? 3 :
6457 (mb_type
& MB_TYPE_16x8
) ? 1 : 0;
6458 int mask_edge0
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
))
6459 && (s
->current_picture
.mb_type
[mb_xy
-1] & (MB_TYPE_16x16
| MB_TYPE_8x16
))
6461 int step
= IS_8x8DCT(mb_type
) ? 2 : 1;
6462 edges
= (mb_type
& MB_TYPE_16x16
) && !(h
->cbp
& 15) ? 1 : 4;
6463 s
->dsp
.h264_loop_filter_strength( bS
, h
->non_zero_count_cache
, h
->ref_cache
, h
->mv_cache
,
6464 (h
->slice_type
== FF_B_TYPE
), edges
, step
, mask_edge0
, mask_edge1
);
6466 if( IS_INTRA(s
->current_picture
.mb_type
[mb_xy
-1]) )
6467 bSv
[0][0] = 0x0004000400040004ULL
;
6468 if( IS_INTRA(s
->current_picture
.mb_type
[h
->top_mb_xy
]) )
6469 bSv
[1][0] = FIELD_PICTURE
? 0x0003000300030003ULL
: 0x0004000400040004ULL
;
6471 #define FILTER(hv,dir,edge)\
6472 if(bSv[dir][edge]) {\
6473 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6475 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6476 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6482 } else if( IS_8x8DCT(mb_type
) ) {
6501 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6502 MpegEncContext
* const s
= &h
->s
;
6503 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
6504 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6505 const int mvy_limit
= IS_INTERLACED(mb_type
) ? 2 : 4;
6506 int first_vertical_edge_done
= 0;
6508 /* FIXME: A given frame may occupy more than one position in
6509 * the reference list. So ref2frm should be populated with
6510 * frame numbers, not indexes. */
6511 static const int ref2frm
[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6512 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6514 //for sufficiently low qp, filtering wouldn't do anything
6515 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6517 int qp_thresh
= 15 - h
->slice_alpha_c0_offset
- FFMAX3(0, h
->pps
.chroma_qp_index_offset
[0], h
->pps
.chroma_qp_index_offset
[1]);
6518 int qp
= s
->current_picture
.qscale_table
[mb_xy
];
6520 && (mb_x
== 0 || ((qp
+ s
->current_picture
.qscale_table
[mb_xy
-1] + 1)>>1) <= qp_thresh
)
6521 && (mb_y
== 0 || ((qp
+ s
->current_picture
.qscale_table
[h
->top_mb_xy
] + 1)>>1) <= qp_thresh
)){
6527 // left mb is in picture
6528 && h
->slice_table
[mb_xy
-1] != 255
6529 // and current and left pair do not have the same interlaced type
6530 && (IS_INTERLACED(mb_type
) != IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
-1]))
6531 // and left mb is in the same slice if deblocking_filter == 2
6532 && (h
->deblocking_filter
!=2 || h
->slice_table
[mb_xy
-1] == h
->slice_table
[mb_xy
])) {
6533 /* First vertical edge is different in MBAFF frames
6534 * There are 8 different bS to compute and 2 different Qp
6536 const int pair_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
6537 const int left_mb_xy
[2] = { pair_xy
-1, pair_xy
-1+s
->mb_stride
};
6542 int mb_qp
, mbn0_qp
, mbn1_qp
;
6544 first_vertical_edge_done
= 1;
6546 if( IS_INTRA(mb_type
) )
6547 bS
[0] = bS
[1] = bS
[2] = bS
[3] = bS
[4] = bS
[5] = bS
[6] = bS
[7] = 4;
6549 for( i
= 0; i
< 8; i
++ ) {
6550 int mbn_xy
= MB_FIELD
? left_mb_xy
[i
>>2] : left_mb_xy
[i
&1];
6552 if( IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) )
6554 else if( h
->non_zero_count_cache
[12+8*(i
>>1)] != 0 ||
6555 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6556 h
->non_zero_count
[mbn_xy
][MB_FIELD
? i
&3 : (i
>>2)+(mb_y
&1)*2] )
6563 mb_qp
= s
->current_picture
.qscale_table
[mb_xy
];
6564 mbn0_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[0]];
6565 mbn1_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[1]];
6566 qp
[0] = ( mb_qp
+ mbn0_qp
+ 1 ) >> 1;
6567 bqp
[0] = ( get_chroma_qp( h
, 0, mb_qp
) +
6568 get_chroma_qp( h
, 0, mbn0_qp
) + 1 ) >> 1;
6569 rqp
[0] = ( get_chroma_qp( h
, 1, mb_qp
) +
6570 get_chroma_qp( h
, 1, mbn0_qp
) + 1 ) >> 1;
6571 qp
[1] = ( mb_qp
+ mbn1_qp
+ 1 ) >> 1;
6572 bqp
[1] = ( get_chroma_qp( h
, 0, mb_qp
) +
6573 get_chroma_qp( h
, 0, mbn1_qp
) + 1 ) >> 1;
6574 rqp
[1] = ( get_chroma_qp( h
, 1, mb_qp
) +
6575 get_chroma_qp( h
, 1, mbn1_qp
) + 1 ) >> 1;
6578 tprintf(s
->avctx
, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x
, mb_y
, qp
[0], qp
[1], bqp
[0], bqp
[1], rqp
[0], rqp
[1], linesize
, uvlinesize
);
6579 { int i
; for (i
= 0; i
< 8; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6580 filter_mb_mbaff_edgev ( h
, &img_y
[0], linesize
, bS
, qp
);
6581 filter_mb_mbaff_edgecv( h
, &img_cb
[0], uvlinesize
, bS
, bqp
);
6582 filter_mb_mbaff_edgecv( h
, &img_cr
[0], uvlinesize
, bS
, rqp
);
6584 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6585 for( dir
= 0; dir
< 2; dir
++ )
6588 const int mbm_xy
= dir
== 0 ? mb_xy
-1 : h
->top_mb_xy
;
6589 const int mbm_type
= s
->current_picture
.mb_type
[mbm_xy
];
6590 int start
= h
->slice_table
[mbm_xy
] == 255 ? 1 : 0;
6592 const int edges
= (mb_type
& (MB_TYPE_16x16
|MB_TYPE_SKIP
))
6593 == (MB_TYPE_16x16
|MB_TYPE_SKIP
) ? 1 : 4;
6594 // how often to recheck mv-based bS when iterating between edges
6595 const int mask_edge
= (mb_type
& (MB_TYPE_16x16
| (MB_TYPE_16x8
<< dir
))) ? 3 :
6596 (mb_type
& (MB_TYPE_8x16
>> dir
)) ? 1 : 0;
6597 // how often to recheck mv-based bS when iterating along each edge
6598 const int mask_par0
= mb_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
));
6600 if (first_vertical_edge_done
) {
6602 first_vertical_edge_done
= 0;
6605 if (h
->deblocking_filter
==2 && h
->slice_table
[mbm_xy
] != h
->slice_table
[mb_xy
])
6608 if (FRAME_MBAFF
&& (dir
== 1) && ((mb_y
&1) == 0) && start
== 0
6609 && !IS_INTERLACED(mb_type
)
6610 && IS_INTERLACED(mbm_type
)
6612 // This is a special case in the norm where the filtering must
6613 // be done twice (one each of the field) even if we are in a
6614 // frame macroblock.
6616 static const int nnz_idx
[4] = {4,5,6,3};
6617 unsigned int tmp_linesize
= 2 * linesize
;
6618 unsigned int tmp_uvlinesize
= 2 * uvlinesize
;
6619 int mbn_xy
= mb_xy
- 2 * s
->mb_stride
;
6624 for(j
=0; j
<2; j
++, mbn_xy
+= s
->mb_stride
){
6625 if( IS_INTRA(mb_type
) ||
6626 IS_INTRA(s
->current_picture
.mb_type
[mbn_xy
]) ) {
6627 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 3;
6629 const uint8_t *mbn_nnz
= h
->non_zero_count
[mbn_xy
];
6630 for( i
= 0; i
< 4; i
++ ) {
6631 if( h
->non_zero_count_cache
[scan8
[0]+i
] != 0 ||
6632 mbn_nnz
[nnz_idx
[i
]] != 0 )
6638 // Do not use s->qscale as luma quantizer because it has not the same
6639 // value in IPCM macroblocks.
6640 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6641 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, tmp_linesize
, tmp_uvlinesize
);
6642 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6643 filter_mb_edgeh( h
, &img_y
[j
*linesize
], tmp_linesize
, bS
, qp
);
6644 filter_mb_edgech( h
, &img_cb
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6645 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6646 filter_mb_edgech( h
, &img_cr
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6647 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6654 for( edge
= start
; edge
< edges
; edge
++ ) {
6655 /* mbn_xy: neighbor macroblock */
6656 const int mbn_xy
= edge
> 0 ? mb_xy
: mbm_xy
;
6657 const int mbn_type
= s
->current_picture
.mb_type
[mbn_xy
];
6661 if( (edge
&1) && IS_8x8DCT(mb_type
) )
6664 if( IS_INTRA(mb_type
) ||
6665 IS_INTRA(mbn_type
) ) {
6668 if ( (!IS_INTERLACED(mb_type
) && !IS_INTERLACED(mbm_type
))
6669 || ((FRAME_MBAFF
|| (s
->picture_structure
!= PICT_FRAME
)) && (dir
== 0))
6678 bS
[0] = bS
[1] = bS
[2] = bS
[3] = value
;
6683 if( edge
& mask_edge
) {
6684 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 0;
6687 else if( FRAME_MBAFF
&& IS_INTERLACED(mb_type
^ mbn_type
)) {
6688 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 1;
6691 else if( mask_par0
&& (edge
|| (mbn_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
)))) ) {
6692 int b_idx
= 8 + 4 + edge
* (dir
? 8:1);
6693 int bn_idx
= b_idx
- (dir
? 8:1);
6695 for( l
= 0; !v
&& l
< 1 + (h
->slice_type
== FF_B_TYPE
); l
++ ) {
6696 v
|= ref2frm
[h
->ref_cache
[l
][b_idx
]+2] != ref2frm
[h
->ref_cache
[l
][bn_idx
]+2] ||
6697 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6698 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
;
6700 bS
[0] = bS
[1] = bS
[2] = bS
[3] = v
;
6706 for( i
= 0; i
< 4; i
++ ) {
6707 int x
= dir
== 0 ? edge
: i
;
6708 int y
= dir
== 0 ? i
: edge
;
6709 int b_idx
= 8 + 4 + x
+ 8*y
;
6710 int bn_idx
= b_idx
- (dir
? 8:1);
6712 if( h
->non_zero_count_cache
[b_idx
] != 0 ||
6713 h
->non_zero_count_cache
[bn_idx
] != 0 ) {
6719 for( l
= 0; l
< 1 + (h
->slice_type
== FF_B_TYPE
); l
++ ) {
6720 if( ref2frm
[h
->ref_cache
[l
][b_idx
]+2] != ref2frm
[h
->ref_cache
[l
][bn_idx
]+2] ||
6721 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6722 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
) {
6730 if(bS
[0]+bS
[1]+bS
[2]+bS
[3] == 0)
6735 // Do not use s->qscale as luma quantizer because it has not the same
6736 // value in IPCM macroblocks.
6737 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6738 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6739 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, linesize
, uvlinesize
);
6740 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6742 filter_mb_edgev( h
, &img_y
[4*edge
], linesize
, bS
, qp
);
6743 if( (edge
&1) == 0 ) {
6744 filter_mb_edgecv( h
, &img_cb
[2*edge
], uvlinesize
, bS
,
6745 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6746 filter_mb_edgecv( h
, &img_cr
[2*edge
], uvlinesize
, bS
,
6747 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6750 filter_mb_edgeh( h
, &img_y
[4*edge
*linesize
], linesize
, bS
, qp
);
6751 if( (edge
&1) == 0 ) {
6752 filter_mb_edgech( h
, &img_cb
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6753 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6754 filter_mb_edgech( h
, &img_cr
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6755 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6762 static int decode_slice(struct AVCodecContext
*avctx
, H264Context
*h
){
6763 MpegEncContext
* const s
= &h
->s
;
6764 const int part_mask
= s
->partitioned_frame
? (AC_END
|AC_ERROR
) : 0x7F;
6768 if( h
->pps
.cabac
) {
6772 align_get_bits( &s
->gb
);
6775 ff_init_cabac_states( &h
->cabac
);
6776 ff_init_cabac_decoder( &h
->cabac
,
6777 s
->gb
.buffer
+ get_bits_count(&s
->gb
)/8,
6778 ( s
->gb
.size_in_bits
- get_bits_count(&s
->gb
) + 7)/8);
6779 /* calculate pre-state */
6780 for( i
= 0; i
< 460; i
++ ) {
6782 if( h
->slice_type
== FF_I_TYPE
)
6783 pre
= av_clip( ((cabac_context_init_I
[i
][0] * s
->qscale
) >>4 ) + cabac_context_init_I
[i
][1], 1, 126 );
6785 pre
= av_clip( ((cabac_context_init_PB
[h
->cabac_init_idc
][i
][0] * s
->qscale
) >>4 ) + cabac_context_init_PB
[h
->cabac_init_idc
][i
][1], 1, 126 );
6788 h
->cabac_state
[i
] = 2 * ( 63 - pre
) + 0;
6790 h
->cabac_state
[i
] = 2 * ( pre
- 64 ) + 1;
6795 int ret
= decode_mb_cabac(h
);
6797 //STOP_TIMER("decode_mb_cabac")
6799 if(ret
>=0) hl_decode_mb(h
);
6801 if( ret
>= 0 && FRAME_MBAFF
) { //FIXME optimal? or let mb_decode decode 16x32 ?
6804 if(ret
>=0) ret
= decode_mb_cabac(h
);
6806 if(ret
>=0) hl_decode_mb(h
);
6809 eos
= get_cabac_terminate( &h
->cabac
);
6811 if( ret
< 0 || h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 2) {
6812 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d, bytestream (%td)\n", s
->mb_x
, s
->mb_y
, h
->cabac
.bytestream_end
- h
->cabac
.bytestream
);
6813 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6817 if( ++s
->mb_x
>= s
->mb_width
) {
6819 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6821 if(FIELD_OR_MBAFF_PICTURE
) {
6826 if( eos
|| s
->mb_y
>= s
->mb_height
) {
6827 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6828 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6835 int ret
= decode_mb_cavlc(h
);
6837 if(ret
>=0) hl_decode_mb(h
);
6839 if(ret
>=0 && FRAME_MBAFF
){ //FIXME optimal? or let mb_decode decode 16x32 ?
6841 ret
= decode_mb_cavlc(h
);
6843 if(ret
>=0) hl_decode_mb(h
);
6848 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6849 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6854 if(++s
->mb_x
>= s
->mb_width
){
6856 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6858 if(FIELD_OR_MBAFF_PICTURE
) {
6861 if(s
->mb_y
>= s
->mb_height
){
6862 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6864 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
) {
6865 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6869 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6876 if(get_bits_count(&s
->gb
) >= s
->gb
.size_in_bits
&& s
->mb_skip_run
<=0){
6877 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6878 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
){
6879 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6883 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6892 for(;s
->mb_y
< s
->mb_height
; s
->mb_y
++){
6893 for(;s
->mb_x
< s
->mb_width
; s
->mb_x
++){
6894 int ret
= decode_mb(h
);
6899 av_log(s
->avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6900 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6905 if(++s
->mb_x
>= s
->mb_width
){
6907 if(++s
->mb_y
>= s
->mb_height
){
6908 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6909 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6913 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6920 if(get_bits_count(s
->?gb
) >= s
->gb
?.size_in_bits
){
6921 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6922 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6926 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6933 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6936 return -1; //not reached
6939 static int decode_unregistered_user_data(H264Context
*h
, int size
){
6940 MpegEncContext
* const s
= &h
->s
;
6941 uint8_t user_data
[16+256];
6947 for(i
=0; i
<sizeof(user_data
)-1 && i
<size
; i
++){
6948 user_data
[i
]= get_bits(&s
->gb
, 8);
6952 e
= sscanf(user_data
+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build
);
6953 if(e
==1 && build
>=0)
6954 h
->x264_build
= build
;
6956 if(s
->avctx
->debug
& FF_DEBUG_BUGS
)
6957 av_log(s
->avctx
, AV_LOG_DEBUG
, "user data:\"%s\"\n", user_data
+16);
6960 skip_bits(&s
->gb
, 8);
6965 static int decode_sei(H264Context
*h
){
6966 MpegEncContext
* const s
= &h
->s
;
6968 while(get_bits_count(&s
->gb
) + 16 < s
->gb
.size_in_bits
){
6973 type
+= show_bits(&s
->gb
, 8);
6974 }while(get_bits(&s
->gb
, 8) == 255);
6978 size
+= show_bits(&s
->gb
, 8);
6979 }while(get_bits(&s
->gb
, 8) == 255);
6983 if(decode_unregistered_user_data(h
, size
) < 0)
6987 skip_bits(&s
->gb
, 8*size
);
6990 //FIXME check bits here
6991 align_get_bits(&s
->gb
);
6997 static inline void decode_hrd_parameters(H264Context
*h
, SPS
*sps
){
6998 MpegEncContext
* const s
= &h
->s
;
7000 cpb_count
= get_ue_golomb(&s
->gb
) + 1;
7001 get_bits(&s
->gb
, 4); /* bit_rate_scale */
7002 get_bits(&s
->gb
, 4); /* cpb_size_scale */
7003 for(i
=0; i
<cpb_count
; i
++){
7004 get_ue_golomb(&s
->gb
); /* bit_rate_value_minus1 */
7005 get_ue_golomb(&s
->gb
); /* cpb_size_value_minus1 */
7006 get_bits1(&s
->gb
); /* cbr_flag */
7008 get_bits(&s
->gb
, 5); /* initial_cpb_removal_delay_length_minus1 */
7009 get_bits(&s
->gb
, 5); /* cpb_removal_delay_length_minus1 */
7010 get_bits(&s
->gb
, 5); /* dpb_output_delay_length_minus1 */
7011 get_bits(&s
->gb
, 5); /* time_offset_length */
7014 static inline int decode_vui_parameters(H264Context
*h
, SPS
*sps
){
7015 MpegEncContext
* const s
= &h
->s
;
7016 int aspect_ratio_info_present_flag
;
7017 unsigned int aspect_ratio_idc
;
7018 int nal_hrd_parameters_present_flag
, vcl_hrd_parameters_present_flag
;
7020 aspect_ratio_info_present_flag
= get_bits1(&s
->gb
);
7022 if( aspect_ratio_info_present_flag
) {
7023 aspect_ratio_idc
= get_bits(&s
->gb
, 8);
7024 if( aspect_ratio_idc
== EXTENDED_SAR
) {
7025 sps
->sar
.num
= get_bits(&s
->gb
, 16);
7026 sps
->sar
.den
= get_bits(&s
->gb
, 16);
7027 }else if(aspect_ratio_idc
< sizeof(pixel_aspect
)/sizeof(*pixel_aspect
)){
7028 sps
->sar
= pixel_aspect
[aspect_ratio_idc
];
7030 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal aspect ratio\n");
7037 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7039 if(get_bits1(&s
->gb
)){ /* overscan_info_present_flag */
7040 get_bits1(&s
->gb
); /* overscan_appropriate_flag */
7043 if(get_bits1(&s
->gb
)){ /* video_signal_type_present_flag */
7044 get_bits(&s
->gb
, 3); /* video_format */
7045 get_bits1(&s
->gb
); /* video_full_range_flag */
7046 if(get_bits1(&s
->gb
)){ /* colour_description_present_flag */
7047 get_bits(&s
->gb
, 8); /* colour_primaries */
7048 get_bits(&s
->gb
, 8); /* transfer_characteristics */
7049 get_bits(&s
->gb
, 8); /* matrix_coefficients */
7053 if(get_bits1(&s
->gb
)){ /* chroma_location_info_present_flag */
7054 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_top_field */
7055 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_bottom_field */
7058 sps
->timing_info_present_flag
= get_bits1(&s
->gb
);
7059 if(sps
->timing_info_present_flag
){
7060 sps
->num_units_in_tick
= get_bits_long(&s
->gb
, 32);
7061 sps
->time_scale
= get_bits_long(&s
->gb
, 32);
7062 sps
->fixed_frame_rate_flag
= get_bits1(&s
->gb
);
7065 nal_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7066 if(nal_hrd_parameters_present_flag
)
7067 decode_hrd_parameters(h
, sps
);
7068 vcl_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7069 if(vcl_hrd_parameters_present_flag
)
7070 decode_hrd_parameters(h
, sps
);
7071 if(nal_hrd_parameters_present_flag
|| vcl_hrd_parameters_present_flag
)
7072 get_bits1(&s
->gb
); /* low_delay_hrd_flag */
7073 get_bits1(&s
->gb
); /* pic_struct_present_flag */
7075 sps
->bitstream_restriction_flag
= get_bits1(&s
->gb
);
7076 if(sps
->bitstream_restriction_flag
){
7077 unsigned int num_reorder_frames
;
7078 get_bits1(&s
->gb
); /* motion_vectors_over_pic_boundaries_flag */
7079 get_ue_golomb(&s
->gb
); /* max_bytes_per_pic_denom */
7080 get_ue_golomb(&s
->gb
); /* max_bits_per_mb_denom */
7081 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_horizontal */
7082 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_vertical */
7083 num_reorder_frames
= get_ue_golomb(&s
->gb
);
7084 get_ue_golomb(&s
->gb
); /*max_dec_frame_buffering*/
7086 if(num_reorder_frames
> 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7087 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal num_reorder_frames %d\n", num_reorder_frames
);
7091 sps
->num_reorder_frames
= num_reorder_frames
;
7097 static void decode_scaling_list(H264Context
*h
, uint8_t *factors
, int size
,
7098 const uint8_t *jvt_list
, const uint8_t *fallback_list
){
7099 MpegEncContext
* const s
= &h
->s
;
7100 int i
, last
= 8, next
= 8;
7101 const uint8_t *scan
= size
== 16 ? zigzag_scan
: zigzag_scan8x8
;
7102 if(!get_bits1(&s
->gb
)) /* matrix not written, we use the predicted one */
7103 memcpy(factors
, fallback_list
, size
*sizeof(uint8_t));
7105 for(i
=0;i
<size
;i
++){
7107 next
= (last
+ get_se_golomb(&s
->gb
)) & 0xff;
7108 if(!i
&& !next
){ /* matrix not written, we use the preset one */
7109 memcpy(factors
, jvt_list
, size
*sizeof(uint8_t));
7112 last
= factors
[scan
[i
]] = next
? next
: last
;
7116 static void decode_scaling_matrices(H264Context
*h
, SPS
*sps
, PPS
*pps
, int is_sps
,
7117 uint8_t (*scaling_matrix4
)[16], uint8_t (*scaling_matrix8
)[64]){
7118 MpegEncContext
* const s
= &h
->s
;
7119 int fallback_sps
= !is_sps
&& sps
->scaling_matrix_present
;
7120 const uint8_t *fallback
[4] = {
7121 fallback_sps
? sps
->scaling_matrix4
[0] : default_scaling4
[0],
7122 fallback_sps
? sps
->scaling_matrix4
[3] : default_scaling4
[1],
7123 fallback_sps
? sps
->scaling_matrix8
[0] : default_scaling8
[0],
7124 fallback_sps
? sps
->scaling_matrix8
[1] : default_scaling8
[1]
7126 if(get_bits1(&s
->gb
)){
7127 sps
->scaling_matrix_present
|= is_sps
;
7128 decode_scaling_list(h
,scaling_matrix4
[0],16,default_scaling4
[0],fallback
[0]); // Intra, Y
7129 decode_scaling_list(h
,scaling_matrix4
[1],16,default_scaling4
[0],scaling_matrix4
[0]); // Intra, Cr
7130 decode_scaling_list(h
,scaling_matrix4
[2],16,default_scaling4
[0],scaling_matrix4
[1]); // Intra, Cb
7131 decode_scaling_list(h
,scaling_matrix4
[3],16,default_scaling4
[1],fallback
[1]); // Inter, Y
7132 decode_scaling_list(h
,scaling_matrix4
[4],16,default_scaling4
[1],scaling_matrix4
[3]); // Inter, Cr
7133 decode_scaling_list(h
,scaling_matrix4
[5],16,default_scaling4
[1],scaling_matrix4
[4]); // Inter, Cb
7134 if(is_sps
|| pps
->transform_8x8_mode
){
7135 decode_scaling_list(h
,scaling_matrix8
[0],64,default_scaling8
[0],fallback
[2]); // Intra, Y
7136 decode_scaling_list(h
,scaling_matrix8
[1],64,default_scaling8
[1],fallback
[3]); // Inter, Y
7138 } else if(fallback_sps
) {
7139 memcpy(scaling_matrix4
, sps
->scaling_matrix4
, 6*16*sizeof(uint8_t));
7140 memcpy(scaling_matrix8
, sps
->scaling_matrix8
, 2*64*sizeof(uint8_t));
7145 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7148 alloc_parameter_set(H264Context
*h
, void **vec
, const unsigned int id
, const unsigned int max
,
7149 const size_t size
, const char *name
)
7152 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "%s_id (%d) out of range\n", name
, id
);
7157 vec
[id
] = av_mallocz(size
);
7159 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cannot allocate memory for %s\n", name
);
7164 static inline int decode_seq_parameter_set(H264Context
*h
){
7165 MpegEncContext
* const s
= &h
->s
;
7166 int profile_idc
, level_idc
;
7167 unsigned int sps_id
, tmp
, mb_width
, mb_height
;
7171 profile_idc
= get_bits(&s
->gb
, 8);
7172 get_bits1(&s
->gb
); //constraint_set0_flag
7173 get_bits1(&s
->gb
); //constraint_set1_flag
7174 get_bits1(&s
->gb
); //constraint_set2_flag
7175 get_bits1(&s
->gb
); //constraint_set3_flag
7176 get_bits(&s
->gb
, 4); // reserved
7177 level_idc
= get_bits(&s
->gb
, 8);
7178 sps_id
= get_ue_golomb(&s
->gb
);
7180 sps
= alloc_parameter_set(h
, (void **)h
->sps_buffers
, sps_id
, MAX_SPS_COUNT
, sizeof(SPS
), "sps");
7184 sps
->profile_idc
= profile_idc
;
7185 sps
->level_idc
= level_idc
;
7187 if(sps
->profile_idc
>= 100){ //high profile
7188 if(get_ue_golomb(&s
->gb
) == 3) //chroma_format_idc
7189 get_bits1(&s
->gb
); //residual_color_transform_flag
7190 get_ue_golomb(&s
->gb
); //bit_depth_luma_minus8
7191 get_ue_golomb(&s
->gb
); //bit_depth_chroma_minus8
7192 sps
->transform_bypass
= get_bits1(&s
->gb
);
7193 decode_scaling_matrices(h
, sps
, NULL
, 1, sps
->scaling_matrix4
, sps
->scaling_matrix8
);
7195 sps
->scaling_matrix_present
= 0;
7197 sps
->log2_max_frame_num
= get_ue_golomb(&s
->gb
) + 4;
7198 sps
->poc_type
= get_ue_golomb(&s
->gb
);
7200 if(sps
->poc_type
== 0){ //FIXME #define
7201 sps
->log2_max_poc_lsb
= get_ue_golomb(&s
->gb
) + 4;
7202 } else if(sps
->poc_type
== 1){//FIXME #define
7203 sps
->delta_pic_order_always_zero_flag
= get_bits1(&s
->gb
);
7204 sps
->offset_for_non_ref_pic
= get_se_golomb(&s
->gb
);
7205 sps
->offset_for_top_to_bottom_field
= get_se_golomb(&s
->gb
);
7206 tmp
= get_ue_golomb(&s
->gb
);
7208 if(tmp
>= sizeof(sps
->offset_for_ref_frame
) / sizeof(sps
->offset_for_ref_frame
[0])){
7209 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "poc_cycle_length overflow %u\n", tmp
);
7212 sps
->poc_cycle_length
= tmp
;
7214 for(i
=0; i
<sps
->poc_cycle_length
; i
++)
7215 sps
->offset_for_ref_frame
[i
]= get_se_golomb(&s
->gb
);
7216 }else if(sps
->poc_type
!= 2){
7217 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal POC type %d\n", sps
->poc_type
);
7221 tmp
= get_ue_golomb(&s
->gb
);
7222 if(tmp
> MAX_PICTURE_COUNT
-2 || tmp
>= 32){
7223 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "too many reference frames\n");
7226 sps
->ref_frame_count
= tmp
;
7227 sps
->gaps_in_frame_num_allowed_flag
= get_bits1(&s
->gb
);
7228 mb_width
= get_ue_golomb(&s
->gb
) + 1;
7229 mb_height
= get_ue_golomb(&s
->gb
) + 1;
7230 if(mb_width
>= INT_MAX
/16 || mb_height
>= INT_MAX
/16 ||
7231 avcodec_check_dimensions(NULL
, 16*mb_width
, 16*mb_height
)){
7232 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_width/height overflow\n");
7235 sps
->mb_width
= mb_width
;
7236 sps
->mb_height
= mb_height
;
7238 sps
->frame_mbs_only_flag
= get_bits1(&s
->gb
);
7239 if(!sps
->frame_mbs_only_flag
)
7240 sps
->mb_aff
= get_bits1(&s
->gb
);
7244 sps
->direct_8x8_inference_flag
= get_bits1(&s
->gb
);
7246 #ifndef ALLOW_INTERLACE
7248 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF support not included; enable it at compile-time.\n");
7250 if(!sps
->direct_8x8_inference_flag
&& sps
->mb_aff
)
7251 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF + !direct_8x8_inference is not implemented\n");
7253 sps
->crop
= get_bits1(&s
->gb
);
7255 sps
->crop_left
= get_ue_golomb(&s
->gb
);
7256 sps
->crop_right
= get_ue_golomb(&s
->gb
);
7257 sps
->crop_top
= get_ue_golomb(&s
->gb
);
7258 sps
->crop_bottom
= get_ue_golomb(&s
->gb
);
7259 if(sps
->crop_left
|| sps
->crop_top
){
7260 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "insane cropping not completely supported, this could look slightly wrong ...\n");
7262 if(sps
->crop_right
>= 8 || sps
->crop_bottom
>= (8>> !h
->sps
.frame_mbs_only_flag
)){
7263 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "brainfart cropping not supported, this could look slightly wrong ...\n");
7269 sps
->crop_bottom
= 0;
7272 sps
->vui_parameters_present_flag
= get_bits1(&s
->gb
);
7273 if( sps
->vui_parameters_present_flag
)
7274 decode_vui_parameters(h
, sps
);
7276 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7277 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7278 sps_id
, sps
->profile_idc
, sps
->level_idc
,
7280 sps
->ref_frame_count
,
7281 sps
->mb_width
, sps
->mb_height
,
7282 sps
->frame_mbs_only_flag
? "FRM" : (sps
->mb_aff
? "MB-AFF" : "PIC-AFF"),
7283 sps
->direct_8x8_inference_flag
? "8B8" : "",
7284 sps
->crop_left
, sps
->crop_right
,
7285 sps
->crop_top
, sps
->crop_bottom
,
7286 sps
->vui_parameters_present_flag
? "VUI" : ""
7293 build_qp_table(PPS
*pps
, int t
, int index
)
7296 for(i
= 0; i
< 255; i
++)
7297 pps
->chroma_qp_table
[t
][i
& 0xff] = chroma_qp
[av_clip(i
+ index
, 0, 51)];
7300 static inline int decode_picture_parameter_set(H264Context
*h
, int bit_length
){
7301 MpegEncContext
* const s
= &h
->s
;
7302 unsigned int tmp
, pps_id
= get_ue_golomb(&s
->gb
);
7305 pps
= alloc_parameter_set(h
, (void **)h
->pps_buffers
, pps_id
, MAX_PPS_COUNT
, sizeof(PPS
), "pps");
7309 tmp
= get_ue_golomb(&s
->gb
);
7310 if(tmp
>=MAX_SPS_COUNT
|| h
->sps_buffers
[tmp
] == NULL
){
7311 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "sps_id out of range\n");
7316 pps
->cabac
= get_bits1(&s
->gb
);
7317 pps
->pic_order_present
= get_bits1(&s
->gb
);
7318 pps
->slice_group_count
= get_ue_golomb(&s
->gb
) + 1;
7319 if(pps
->slice_group_count
> 1 ){
7320 pps
->mb_slice_group_map_type
= get_ue_golomb(&s
->gb
);
7321 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "FMO not supported\n");
7322 switch(pps
->mb_slice_group_map_type
){
7325 | for( i
= 0; i
<= num_slice_groups_minus1
; i
++ ) | | |
7326 | run_length
[ i
] |1 |ue(v
) |
7331 | for( i
= 0; i
< num_slice_groups_minus1
; i
++ ) | | |
7333 | top_left_mb
[ i
] |1 |ue(v
) |
7334 | bottom_right_mb
[ i
] |1 |ue(v
) |
7342 | slice_group_change_direction_flag
|1 |u(1) |
7343 | slice_group_change_rate_minus1
|1 |ue(v
) |
7348 | slice_group_id_cnt_minus1
|1 |ue(v
) |
7349 | for( i
= 0; i
<= slice_group_id_cnt_minus1
; i
++ | | |
7351 | slice_group_id
[ i
] |1 |u(v
) |
7356 pps
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
7357 pps
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
7358 if(pps
->ref_count
[0]-1 > 32-1 || pps
->ref_count
[1]-1 > 32-1){
7359 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow (pps)\n");
7360 pps
->ref_count
[0]= pps
->ref_count
[1]= 1;
7364 pps
->weighted_pred
= get_bits1(&s
->gb
);
7365 pps
->weighted_bipred_idc
= get_bits(&s
->gb
, 2);
7366 pps
->init_qp
= get_se_golomb(&s
->gb
) + 26;
7367 pps
->init_qs
= get_se_golomb(&s
->gb
) + 26;
7368 pps
->chroma_qp_index_offset
[0]= get_se_golomb(&s
->gb
);
7369 pps
->deblocking_filter_parameters_present
= get_bits1(&s
->gb
);
7370 pps
->constrained_intra_pred
= get_bits1(&s
->gb
);
7371 pps
->redundant_pic_cnt_present
= get_bits1(&s
->gb
);
7373 pps
->transform_8x8_mode
= 0;
7374 h
->dequant_coeff_pps
= -1; //contents of sps/pps can change even if id doesn't, so reinit
7375 memset(pps
->scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
7376 memset(pps
->scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
7378 if(get_bits_count(&s
->gb
) < bit_length
){
7379 pps
->transform_8x8_mode
= get_bits1(&s
->gb
);
7380 decode_scaling_matrices(h
, h
->sps_buffers
[pps
->sps_id
], pps
, 0, pps
->scaling_matrix4
, pps
->scaling_matrix8
);
7381 pps
->chroma_qp_index_offset
[1]= get_se_golomb(&s
->gb
); //second_chroma_qp_index_offset
7383 pps
->chroma_qp_index_offset
[1]= pps
->chroma_qp_index_offset
[0];
7386 build_qp_table(pps
, 0, pps
->chroma_qp_index_offset
[0]);
7387 if(pps
->chroma_qp_index_offset
[0] != pps
->chroma_qp_index_offset
[1]) {
7388 build_qp_table(pps
, 1, pps
->chroma_qp_index_offset
[1]);
7389 h
->pps
.chroma_qp_diff
= 1;
7391 memcpy(pps
->chroma_qp_table
[1], pps
->chroma_qp_table
[0], 256);
7393 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7394 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7395 pps_id
, pps
->sps_id
,
7396 pps
->cabac
? "CABAC" : "CAVLC",
7397 pps
->slice_group_count
,
7398 pps
->ref_count
[0], pps
->ref_count
[1],
7399 pps
->weighted_pred
? "weighted" : "",
7400 pps
->init_qp
, pps
->init_qs
, pps
->chroma_qp_index_offset
[0], pps
->chroma_qp_index_offset
[1],
7401 pps
->deblocking_filter_parameters_present
? "LPAR" : "",
7402 pps
->constrained_intra_pred
? "CONSTR" : "",
7403 pps
->redundant_pic_cnt_present
? "REDU" : "",
7404 pps
->transform_8x8_mode
? "8x8DCT" : ""
7412 * Call decode_slice() for each context.
7414 * @param h h264 master context
7415 * @param context_count number of contexts to execute
7417 static void execute_decode_slices(H264Context
*h
, int context_count
){
7418 MpegEncContext
* const s
= &h
->s
;
7419 AVCodecContext
* const avctx
= s
->avctx
;
7423 if(context_count
== 1) {
7424 decode_slice(avctx
, h
);
7426 for(i
= 1; i
< context_count
; i
++) {
7427 hx
= h
->thread_context
[i
];
7428 hx
->s
.error_resilience
= avctx
->error_resilience
;
7429 hx
->s
.error_count
= 0;
7432 avctx
->execute(avctx
, (void *)decode_slice
,
7433 (void **)h
->thread_context
, NULL
, context_count
);
7435 /* pull back stuff from slices to master context */
7436 hx
= h
->thread_context
[context_count
- 1];
7437 s
->mb_x
= hx
->s
.mb_x
;
7438 s
->mb_y
= hx
->s
.mb_y
;
7439 s
->dropable
= hx
->s
.dropable
;
7440 s
->picture_structure
= hx
->s
.picture_structure
;
7441 for(i
= 1; i
< context_count
; i
++)
7442 h
->s
.error_count
+= h
->thread_context
[i
]->s
.error_count
;
7447 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
){
7448 MpegEncContext
* const s
= &h
->s
;
7449 AVCodecContext
* const avctx
= s
->avctx
;
7451 H264Context
*hx
; ///< thread context
7452 int context_count
= 0;
7454 h
->max_contexts
= avctx
->thread_count
;
7457 for(i
=0; i
<50; i
++){
7458 av_log(NULL
, AV_LOG_ERROR
,"%02X ", buf
[i
]);
7461 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
)){
7462 h
->current_slice
= 0;
7463 if (!s
->first_field
)
7464 s
->current_picture_ptr
= NULL
;
7476 if(buf_index
>= buf_size
) break;
7478 for(i
= 0; i
< h
->nal_length_size
; i
++)
7479 nalsize
= (nalsize
<< 8) | buf
[buf_index
++];
7480 if(nalsize
<= 1 || (nalsize
+buf_index
> buf_size
)){
7485 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: nal size %d\n", nalsize
);
7490 // start code prefix search
7491 for(; buf_index
+ 3 < buf_size
; buf_index
++){
7492 // This should always succeed in the first iteration.
7493 if(buf
[buf_index
] == 0 && buf
[buf_index
+1] == 0 && buf
[buf_index
+2] == 1)
7497 if(buf_index
+3 >= buf_size
) break;
7502 hx
= h
->thread_context
[context_count
];
7504 ptr
= decode_nal(hx
, buf
+ buf_index
, &dst_length
, &consumed
, h
->is_avc
? nalsize
: buf_size
- buf_index
);
7505 if (ptr
==NULL
|| dst_length
< 0){
7508 while(ptr
[dst_length
- 1] == 0 && dst_length
> 0)
7510 bit_length
= !dst_length
? 0 : (8*dst_length
- decode_rbsp_trailing(h
, ptr
+ dst_length
- 1));
7512 if(s
->avctx
->debug
&FF_DEBUG_STARTCODE
){
7513 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "NAL %d at %d/%d length %d\n", hx
->nal_unit_type
, buf_index
, buf_size
, dst_length
);
7516 if (h
->is_avc
&& (nalsize
!= consumed
)){
7517 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: Consumed only %d bytes instead of %d\n", consumed
, nalsize
);
7521 buf_index
+= consumed
;
7523 if( (s
->hurry_up
== 1 && h
->nal_ref_idc
== 0) //FIXME do not discard SEI id
7524 ||(avctx
->skip_frame
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
7529 switch(hx
->nal_unit_type
){
7531 if (h
->nal_unit_type
!= NAL_IDR_SLICE
) {
7532 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Invalid mix of idr and non-idr slices");
7535 idr(h
); //FIXME ensure we don't loose some frames if there is reordering
7537 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7539 hx
->inter_gb_ptr
= &hx
->s
.gb
;
7540 hx
->s
.data_partitioning
= 0;
7542 if((err
= decode_slice_header(hx
, h
)))
7545 s
->current_picture_ptr
->key_frame
|= (hx
->nal_unit_type
== NAL_IDR_SLICE
);
7546 if(hx
->redundant_pic_count
==0 && hx
->s
.hurry_up
< 5
7547 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7548 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type
!=FF_B_TYPE
)
7549 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type
==FF_I_TYPE
)
7550 && avctx
->skip_frame
< AVDISCARD_ALL
)
7554 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7556 hx
->inter_gb_ptr
= NULL
;
7557 hx
->s
.data_partitioning
= 1;
7559 err
= decode_slice_header(hx
, h
);
7562 init_get_bits(&hx
->intra_gb
, ptr
, bit_length
);
7563 hx
->intra_gb_ptr
= &hx
->intra_gb
;
7566 init_get_bits(&hx
->inter_gb
, ptr
, bit_length
);
7567 hx
->inter_gb_ptr
= &hx
->inter_gb
;
7569 if(hx
->redundant_pic_count
==0 && hx
->intra_gb_ptr
&& hx
->s
.data_partitioning
7570 && s
->context_initialized
7572 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7573 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type
!=FF_B_TYPE
)
7574 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type
==FF_I_TYPE
)
7575 && avctx
->skip_frame
< AVDISCARD_ALL
)
7579 init_get_bits(&s
->gb
, ptr
, bit_length
);
7583 init_get_bits(&s
->gb
, ptr
, bit_length
);
7584 decode_seq_parameter_set(h
);
7586 if(s
->flags
& CODEC_FLAG_LOW_DELAY
)
7589 if(avctx
->has_b_frames
< 2)
7590 avctx
->has_b_frames
= !s
->low_delay
;
7593 init_get_bits(&s
->gb
, ptr
, bit_length
);
7595 decode_picture_parameter_set(h
, bit_length
);
7599 case NAL_END_SEQUENCE
:
7600 case NAL_END_STREAM
:
7601 case NAL_FILLER_DATA
:
7603 case NAL_AUXILIARY_SLICE
:
7606 av_log(avctx
, AV_LOG_DEBUG
, "Unknown NAL code: %d (%d bits)\n", h
->nal_unit_type
, bit_length
);
7609 if(context_count
== h
->max_contexts
) {
7610 execute_decode_slices(h
, context_count
);
7615 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
7617 /* Slice could not be decoded in parallel mode, copy down
7618 * NAL unit stuff to context 0 and restart. Note that
7619 * rbsp_buffer is not transfered, but since we no longer
7620 * run in parallel mode this should not be an issue. */
7621 h
->nal_unit_type
= hx
->nal_unit_type
;
7622 h
->nal_ref_idc
= hx
->nal_ref_idc
;
7628 execute_decode_slices(h
, context_count
);
7633 * returns the number of bytes consumed for building the current frame
7635 static int get_consumed_bytes(MpegEncContext
*s
, int pos
, int buf_size
){
7636 if(s
->flags
&CODEC_FLAG_TRUNCATED
){
7637 pos
-= s
->parse_context
.last_index
;
7638 if(pos
<0) pos
=0; // FIXME remove (unneeded?)
7642 if(pos
==0) pos
=1; //avoid infinite loops (i doubt that is needed but ...)
7643 if(pos
+10>buf_size
) pos
=buf_size
; // oops ;)
7649 static int decode_frame(AVCodecContext
*avctx
,
7650 void *data
, int *data_size
,
7651 const uint8_t *buf
, int buf_size
)
7653 H264Context
*h
= avctx
->priv_data
;
7654 MpegEncContext
*s
= &h
->s
;
7655 AVFrame
*pict
= data
;
7658 s
->flags
= avctx
->flags
;
7659 s
->flags2
= avctx
->flags2
;
7661 if(s
->flags
&CODEC_FLAG_TRUNCATED
){
7662 const int next
= ff_h264_find_frame_end(h
, buf
, buf_size
);
7663 assert((buf_size
> 0) || (next
== END_NOT_FOUND
));
7665 if( ff_combine_frame(&s
->parse_context
, next
, &buf
, &buf_size
) < 0 )
7667 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7670 /* no supplementary picture */
7671 if (buf_size
== 0) {
7675 //FIXME factorize this with the output code below
7676 out
= h
->delayed_pic
[0];
7678 for(i
=1; h
->delayed_pic
[i
] && !h
->delayed_pic
[i
]->key_frame
; i
++)
7679 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7680 out
= h
->delayed_pic
[i
];
7684 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7685 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7688 *data_size
= sizeof(AVFrame
);
7689 *pict
= *(AVFrame
*)out
;
7695 if(h
->is_avc
&& !h
->got_avcC
) {
7696 int i
, cnt
, nalsize
;
7697 unsigned char *p
= avctx
->extradata
;
7698 if(avctx
->extradata_size
< 7) {
7699 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
7703 av_log(avctx
, AV_LOG_ERROR
, "Unknown avcC version %d\n", *p
);
7706 /* sps and pps in the avcC always have length coded with 2 bytes,
7707 so put a fake nal_length_size = 2 while parsing them */
7708 h
->nal_length_size
= 2;
7709 // Decode sps from avcC
7710 cnt
= *(p
+5) & 0x1f; // Number of sps
7712 for (i
= 0; i
< cnt
; i
++) {
7713 nalsize
= AV_RB16(p
) + 2;
7714 if(decode_nal_units(h
, p
, nalsize
) < 0) {
7715 av_log(avctx
, AV_LOG_ERROR
, "Decoding sps %d from avcC failed\n", i
);
7720 // Decode pps from avcC
7721 cnt
= *(p
++); // Number of pps
7722 for (i
= 0; i
< cnt
; i
++) {
7723 nalsize
= AV_RB16(p
) + 2;
7724 if(decode_nal_units(h
, p
, nalsize
) != nalsize
) {
7725 av_log(avctx
, AV_LOG_ERROR
, "Decoding pps %d from avcC failed\n", i
);
7730 // Now store right nal length size, that will be use to parse all other nals
7731 h
->nal_length_size
= ((*(((char*)(avctx
->extradata
))+4))&0x03)+1;
7732 // Do not reparse avcC
7736 if(avctx
->frame_number
==0 && !h
->is_avc
&& s
->avctx
->extradata_size
){
7737 if(decode_nal_units(h
, s
->avctx
->extradata
, s
->avctx
->extradata_size
) < 0)
7741 buf_index
=decode_nal_units(h
, buf
, buf_size
);
7745 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) && !s
->current_picture_ptr
){
7746 if (avctx
->skip_frame
>= AVDISCARD_NONREF
|| s
->hurry_up
) return 0;
7747 av_log(avctx
, AV_LOG_ERROR
, "no frame!\n");
7751 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) || (s
->mb_y
>= s
->mb_height
&& s
->mb_height
)){
7752 Picture
*out
= s
->current_picture_ptr
;
7753 Picture
*cur
= s
->current_picture_ptr
;
7754 Picture
*prev
= h
->delayed_output_pic
;
7755 int i
, pics
, cross_idr
, out_of_order
, out_idx
;
7759 s
->current_picture_ptr
->qscale_type
= FF_QSCALE_TYPE_H264
;
7760 s
->current_picture_ptr
->pict_type
= s
->pict_type
;
7762 h
->prev_frame_num_offset
= h
->frame_num_offset
;
7763 h
->prev_frame_num
= h
->frame_num
;
7765 h
->prev_poc_msb
= h
->poc_msb
;
7766 h
->prev_poc_lsb
= h
->poc_lsb
;
7767 execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
7771 * FIXME: Error handling code does not seem to support interlaced
7772 * when slices span multiple rows
7773 * The ff_er_add_slice calls don't work right for bottom
7774 * fields; they cause massive erroneous error concealing
7775 * Error marking covers both fields (top and bottom).
7776 * This causes a mismatched s->error_count
7777 * and a bad error table. Further, the error count goes to
7778 * INT_MAX when called for bottom field, because mb_y is
7779 * past end by one (callers fault) and resync_mb_y != 0
7780 * causes problems for the first MB line, too.
7787 if (s
->first_field
) {
7788 /* Wait for second field. */
7792 cur
->interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
7793 /* Derive top_field_first from field pocs. */
7794 cur
->top_field_first
= cur
->field_poc
[0] < cur
->field_poc
[1];
7796 //FIXME do something with unavailable reference frames
7798 #if 0 //decode order
7799 *data_size
= sizeof(AVFrame
);
7801 /* Sort B-frames into display order */
7803 if(h
->sps
.bitstream_restriction_flag
7804 && s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
7805 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
7810 while(h
->delayed_pic
[pics
]) pics
++;
7812 assert(pics
+1 < sizeof(h
->delayed_pic
) / sizeof(h
->delayed_pic
[0]));
7814 h
->delayed_pic
[pics
++] = cur
;
7815 if(cur
->reference
== 0)
7816 cur
->reference
= DELAYED_PIC_REF
;
7819 for(i
=0; h
->delayed_pic
[i
]; i
++)
7820 if(h
->delayed_pic
[i
]->key_frame
|| h
->delayed_pic
[i
]->poc
==0)
7823 out
= h
->delayed_pic
[0];
7825 for(i
=1; h
->delayed_pic
[i
] && !h
->delayed_pic
[i
]->key_frame
; i
++)
7826 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7827 out
= h
->delayed_pic
[i
];
7831 out_of_order
= !cross_idr
&& prev
&& out
->poc
< prev
->poc
;
7832 if(h
->sps
.bitstream_restriction_flag
&& s
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
)
7834 else if(prev
&& pics
<= s
->avctx
->has_b_frames
)
7836 else if((out_of_order
&& pics
-1 == s
->avctx
->has_b_frames
&& pics
< 15)
7838 ((!cross_idr
&& prev
&& out
->poc
> prev
->poc
+ 2)
7839 || cur
->pict_type
== FF_B_TYPE
)))
7842 s
->avctx
->has_b_frames
++;
7845 else if(out_of_order
)
7848 if(out_of_order
|| pics
> s
->avctx
->has_b_frames
){
7849 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7850 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7856 *data_size
= sizeof(AVFrame
);
7857 if(prev
&& prev
!= out
&& prev
->reference
== DELAYED_PIC_REF
)
7858 prev
->reference
= 0;
7859 h
->delayed_output_pic
= out
;
7863 *pict
= *(AVFrame
*)out
;
7865 av_log(avctx
, AV_LOG_DEBUG
, "no picture\n");
7869 assert(pict
->data
[0] || !*data_size
);
7870 ff_print_debug_info(s
, pict
);
7871 //printf("out %d\n", (int)pict->data[0]);
7874 /* Return the Picture timestamp as the frame number */
7875 /* we subtract 1 because it is added on utils.c */
7876 avctx
->frame_number
= s
->picture_number
- 1;
7878 return get_consumed_bytes(s
, buf_index
, buf_size
);
7881 static inline void fill_mb_avail(H264Context
*h
){
7882 MpegEncContext
* const s
= &h
->s
;
7883 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
7886 h
->mb_avail
[0]= s
->mb_x
&& h
->slice_table
[mb_xy
- s
->mb_stride
- 1] == h
->slice_num
;
7887 h
->mb_avail
[1]= h
->slice_table
[mb_xy
- s
->mb_stride
] == h
->slice_num
;
7888 h
->mb_avail
[2]= s
->mb_x
+1 < s
->mb_width
&& h
->slice_table
[mb_xy
- s
->mb_stride
+ 1] == h
->slice_num
;
7894 h
->mb_avail
[3]= s
->mb_x
&& h
->slice_table
[mb_xy
- 1] == h
->slice_num
;
7895 h
->mb_avail
[4]= 1; //FIXME move out
7896 h
->mb_avail
[5]= 0; //FIXME move out
7904 #define SIZE (COUNT*40)
7910 // int int_temp[10000];
7912 AVCodecContext avctx
;
7914 dsputil_init(&dsp
, &avctx
);
7916 init_put_bits(&pb
, temp
, SIZE
);
7917 printf("testing unsigned exp golomb\n");
7918 for(i
=0; i
<COUNT
; i
++){
7920 set_ue_golomb(&pb
, i
);
7921 STOP_TIMER("set_ue_golomb");
7923 flush_put_bits(&pb
);
7925 init_get_bits(&gb
, temp
, 8*SIZE
);
7926 for(i
=0; i
<COUNT
; i
++){
7929 s
= show_bits(&gb
, 24);
7932 j
= get_ue_golomb(&gb
);
7934 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7937 STOP_TIMER("get_ue_golomb");
7941 init_put_bits(&pb
, temp
, SIZE
);
7942 printf("testing signed exp golomb\n");
7943 for(i
=0; i
<COUNT
; i
++){
7945 set_se_golomb(&pb
, i
- COUNT
/2);
7946 STOP_TIMER("set_se_golomb");
7948 flush_put_bits(&pb
);
7950 init_get_bits(&gb
, temp
, 8*SIZE
);
7951 for(i
=0; i
<COUNT
; i
++){
7954 s
= show_bits(&gb
, 24);
7957 j
= get_se_golomb(&gb
);
7958 if(j
!= i
- COUNT
/2){
7959 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7962 STOP_TIMER("get_se_golomb");
7966 printf("testing 4x4 (I)DCT\n");
7969 uint8_t src
[16], ref
[16];
7970 uint64_t error
= 0, max_error
=0;
7972 for(i
=0; i
<COUNT
; i
++){
7974 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7975 for(j
=0; j
<16; j
++){
7976 ref
[j
]= random()%255;
7977 src
[j
]= random()%255;
7980 h264_diff_dct_c(block
, src
, ref
, 4);
7983 for(j
=0; j
<16; j
++){
7984 // printf("%d ", block[j]);
7985 block
[j
]= block
[j
]*4;
7986 if(j
&1) block
[j
]= (block
[j
]*4 + 2)/5;
7987 if(j
&4) block
[j
]= (block
[j
]*4 + 2)/5;
7991 s
->dsp
.h264_idct_add(ref
, block
, 4);
7992 /* for(j=0; j<16; j++){
7993 printf("%d ", ref[j]);
7997 for(j
=0; j
<16; j
++){
7998 int diff
= FFABS(src
[j
] - ref
[j
]);
8001 max_error
= FFMAX(max_error
, diff
);
8004 printf("error=%f max_error=%d\n", ((float)error
)/COUNT
/16, (int)max_error
);
8005 printf("testing quantizer\n");
8006 for(qp
=0; qp
<52; qp
++){
8008 src1_block
[i
]= src2_block
[i
]= random()%255;
8011 printf("Testing NAL layer\n");
8013 uint8_t bitstream
[COUNT
];
8014 uint8_t nal
[COUNT
*2];
8016 memset(&h
, 0, sizeof(H264Context
));
8018 for(i
=0; i
<COUNT
; i
++){
8026 for(j
=0; j
<COUNT
; j
++){
8027 bitstream
[j
]= (random() % 255) + 1;
8030 for(j
=0; j
<zeros
; j
++){
8031 int pos
= random() % COUNT
;
8032 while(bitstream
[pos
] == 0){
8041 nal_length
= encode_nal(&h
, nal
, bitstream
, COUNT
, COUNT
*2);
8043 printf("encoding failed\n");
8047 out
= decode_nal(&h
, nal
, &out_length
, &consumed
, nal_length
);
8051 if(out_length
!= COUNT
){
8052 printf("incorrect length %d %d\n", out_length
, COUNT
);
8056 if(consumed
!= nal_length
){
8057 printf("incorrect consumed length %d %d\n", nal_length
, consumed
);
8061 if(memcmp(bitstream
, out
, COUNT
)){
8062 printf("mismatch\n");
8068 printf("Testing RBSP\n");
8076 static av_cold
int decode_end(AVCodecContext
*avctx
)
8078 H264Context
*h
= avctx
->priv_data
;
8079 MpegEncContext
*s
= &h
->s
;
8081 av_freep(&h
->rbsp_buffer
[0]);
8082 av_freep(&h
->rbsp_buffer
[1]);
8083 free_tables(h
); //FIXME cleanup init stuff perhaps
8086 // memset(h, 0, sizeof(H264Context));
8092 AVCodec h264_decoder
= {
8096 sizeof(H264Context
),
8101 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1
| CODEC_CAP_TRUNCATED
| CODEC_CAP_DELAY
,
8103 .long_name
= NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),